# HG changeset patch # User František Kučera # Date 1547224916 -3600 # Node ID 14e14a5db027ec48d867e58e365ef89f801a6848 # Parent 85741b08db48d6715d3041cc857bbbdd6a92851e lossless bidirectional XML conversion of relational data Both generate same data: relpipe-in-cli demo | relpipe-out-xml | relpipe-in-xml | sha512sum relpipe-in-cli demo | sha512sum diff -r 85741b08db48 -r 14e14a5db027 src/XMLCommand.h --- a/src/XMLCommand.h Fri Jan 11 16:20:25 2019 +0100 +++ b/src/XMLCommand.h Fri Jan 11 17:41:56 2019 +0100 @@ -1,6 +1,6 @@ /** * Relational pipes - * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info) + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -46,96 +47,80 @@ class RelpipeSaxHandler : public xercesc::DefaultHandler { private: + const wstring XMLNS = L"tag:globalcode.info,2018:relpipe"; unique_ptr writer; XercesStringConvertor xConvertor; + wstring currentRelationName; + vector currentAttributes; + wstringstream currentValue; + + void resetCurrentValue() { + currentValue.str(L""); + currentValue.clear(); + } + + string_t getAttributeName(const Attributes& attrs, string_t uri, string_t localname) { + // TODO: less string conversions, better performance + XMLCh* xUri = xConvertor.toXercesString(uri); + XMLCh* xLocalName = xConvertor.toXercesString(localname); + string_t value = xConvertor.toString(attrs.getValue(xUri, xLocalName)); + XMLString::release(&xUri); + XMLString::release(&xLocalName); + return value; + } + + void startElement(const string_t uri, const string_t localname, const string_t qname, const Attributes& attrs) { + if (uri == XMLNS) { + if (localname == L"name") { + resetCurrentValue(); + } else if (localname == L"attributes-metadata") { + currentAttributes.clear(); + } else if (localname == L"attribute-metadata") { + AttributeMetadata am; + am.attributeName = getAttributeName(attrs, L"", L"name"); + am.typeId = writer->toTypeId(getAttributeName(attrs, L"", L"type")); + currentAttributes.push_back(am); + } else if (localname == L"attribute") { + resetCurrentValue(); + } + } + } + + void endElement(const string_t uri, const string_t localname, const string_t qname) { + if (uri == XMLNS) { + if (localname == L"name") { + currentRelationName = currentValue.str(); + } else if (localname == L"attributes-metadata") { + writer->startRelation(currentRelationName, currentAttributes, true); + } else if (localname == L"attribute") { + writer->writeAttribute(currentValue.str()); + } + } + } + + void characters(const string_t chars) { + currentValue << chars.c_str(); + } public: RelpipeSaxHandler(std::ostream& output) : DefaultHandler(), writer(Factory::create(output)) { } - void startDocument() override { - //XMLString:: - // TODO: remove demo - writer->startRelation(L"xml",{ - {L"event", TypeId::STRING}, - {L"uri", TypeId::STRING}, - {L"localname", TypeId::STRING}, - {L"qname", TypeId::STRING}, - {L"chars", TypeId::STRING} - }, true); + virtual ~RelpipeSaxHandler() { + } void startElement(const XMLCh * const uri, const XMLCh * const localname, const XMLCh * const qname, const Attributes& attrs) override { - writer->writeAttribute(L"startElement"); - writer->writeAttribute(xConvertor.toString(uri)); - writer->writeAttribute(xConvertor.toString(localname)); - writer->writeAttribute(xConvertor.toString(qname)); - writer->writeAttribute(L""); - - for (int i = 0; i < attrs.getLength(); i++) { - writer->writeAttribute(L"attribute"); - writer->writeAttribute(xConvertor.toString(attrs.getURI(i))); - writer->writeAttribute(xConvertor.toString(attrs.getLocalName(i))); - writer->writeAttribute(xConvertor.toString(attrs.getQName(i))); - writer->writeAttribute(xConvertor.toString(attrs.getValue(i))); - } + startElement(xConvertor.toString(uri), xConvertor.toString(localname), xConvertor.toString(qname), attrs); } void endElement(const XMLCh * const uri, const XMLCh * const localname, const XMLCh * const qname) override { - writer->writeAttribute(L"endElement"); - writer->writeAttribute(xConvertor.toString(uri)); - writer->writeAttribute(xConvertor.toString(localname)); - writer->writeAttribute(xConvertor.toString(qname)); - writer->writeAttribute(L""); + endElement(xConvertor.toString(uri), xConvertor.toString(localname), xConvertor.toString(qname)); } void characters(const XMLCh * const chars, const XMLSize_t length) override { - writer->writeAttribute(L"characters"); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(xConvertor.toString(chars)); - } - - void comment(const XMLCh * const chars, const XMLSize_t length) override { - writer->writeAttribute(L"comment"); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(xConvertor.toString(chars)); - } - - void startCDATA() override { - writer->writeAttribute(L"startCDATA"); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - } - - void endCDATA() override { - writer->writeAttribute(L"endCDATA"); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - } - - void processingInstruction(const XMLCh * const target, const XMLCh * const data) override { - writer->writeAttribute(L"processingInstruction"); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(xConvertor.toString(target)); - writer->writeAttribute(xConvertor.toString(data)); - } - - void endDocument() override { - writer->writeAttribute(L"endDocument"); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(L""); - writer->writeAttribute(L""); + characters(xConvertor.toString(chars)); } }; @@ -154,7 +139,6 @@ RelpipeSaxHandler saxHandler(output); parser->setContentHandler(&saxHandler); - parser->setLexicalHandler(&saxHandler); // TODO: remove – needed only for comments parser->setErrorHandler(&saxHandler); StreamInputSource inputSource(input); diff -r 85741b08db48 -r 14e14a5db027 src/XercesStringConvertor.h --- a/src/XercesStringConvertor.h Fri Jan 11 16:20:25 2019 +0100 +++ b/src/XercesStringConvertor.h Fri Jan 11 17:41:56 2019 +0100 @@ -38,7 +38,7 @@ public: - string_t toString(const XMLCh * const chars) { + string_t toString(const XMLCh* const chars) { // XMLCh = char16_t // „All XML data is handled within Xerces-C++ as strings of XMLCh characters. Regardless of the size of the type chosen, the data stored in variables of type XMLCh will always be utf-16 encoded values.“ // see https://xerces.apache.org/xerces-c/program-others-3.html @@ -50,6 +50,14 @@ XMLString::release(&x); return convertor.from_bytes(s); } + + /** + * @param string + * @return Xerces string. Must be released manually after use, see XMLString::release(). + */ + XMLCh* toXercesString(string_t string) { + return XMLString::transcode(convertor.to_bytes(string).c_str()); + } }; } diff -r 85741b08db48 -r 14e14a5db027 src/relpipe-in-xml.cpp --- a/src/relpipe-in-xml.cpp Fri Jan 11 16:20:25 2019 +0100 +++ b/src/relpipe-in-xml.cpp Fri Jan 11 17:41:56 2019 +0100 @@ -1,6 +1,6 @@ /** * Relational pipes - * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info) + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by