diff -r 000000000000 -r ea26b3359fed src/XMLTableCommand.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/XMLTableCommand.h Sat Jan 16 16:36:39 2021 +0100 @@ -0,0 +1,137 @@ +/** + * Relational pipes + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "Configuration.h" +#include "XMLDocumentConstructor.h" + +namespace relpipe { +namespace in { +namespace xmltable { + +using namespace relpipe::writer; + +class XMLCommand { +private: + std::wstring_convert> convertor; // TODO: support also other encodings. + + string_t formatRawXML(string_t rawXML) { + std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$"); + return std::regex_replace(rawXML, pattern, L""); + } + + void importNode(xmlpp::Node* parent, xmlpp::Node* child, AttributeRecipe attributeRecipe) { + if (dynamic_cast (child)) parent->add_child_with_new_ns( + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); + else parent->import_node(child, true); + } + + void importNode(xmlpp::Document* document, xmlpp::Node* child, AttributeRecipe attributeRecipe) { + if (dynamic_cast (child)) document->create_root_node( + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); + else document->create_root_node_by_import(child, true); + } + + string_t toRawXML(xmlpp::Node* parent, AttributeRecipe attributeRecipe, xmlpp::Node::PrefixNsMap ns) { + xmlpp::Document d; + xmlpp::NodeSet nodes = parent->find(convertor.to_bytes(attributeRecipe.xpath), ns); + + if (attributeRecipe.rawXmlNodeListWrapper.name.size()) { + d.create_root_node( + convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.name), + convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.uri), + convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.prefix)); + for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node, attributeRecipe); + } else { + if (nodes.size() == 1) importNode(&d, nodes[0], attributeRecipe); + else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception + else return L""; // TODO: null + } + return formatRawXML(convertor.from_bytes(d.write_to_string())); + } + +public: + + void process(std::istream& input, std::ostream& output, Configuration& configuration) { + std::shared_ptr writer(Factory::create(output)); + + xmlpp::DomParser parser; + XMLDocumentConstructor documentConstructor(&input, &parser); + for (ParserOptionRecipe o : configuration.parserOptions) documentConstructor.setOption(convertor.to_bytes(o.uri), convertor.to_bytes(o.value)); + documentConstructor.process(); + if (configuration.xinclude) parser.get_document()->process_xinclude(true); + xmlpp::Element* root = parser.get_document()->get_root_node(); + + xmlpp::Node::PrefixNsMap ns; + for (int i = 0; i < configuration.namespaceMappings.size(); i++) { + std::string prefix = convertor.to_bytes(configuration.namespaceMappings[i]); + std::string uri = convertor.to_bytes(configuration.namespaceMappings[++i]); + ns[prefix] = uri; + } + + for (const RelationConfiguration& r : configuration.relationConfigurations) { + std::vector attributesMetadata; + for (AttributeRecipe a : r.attributes) attributesMetadata.push_back(AttributeMetadata{a.name, a.type}); + relpipe::writer::string_t name = r.nameIsXPath ? convertor.from_bytes(root->eval_to_string(convertor.to_bytes(r.relation), ns)) : r.relation; + writer->startRelation(name, attributesMetadata, true); + for (xmlpp::Node* n : root->find(convertor.to_bytes(r.xpath), ns)) { + for (AttributeRecipe a : r.attributes) { + // TODO: convert to bytes only once + std::string attributeXpath = convertor.to_bytes(a.xpath); + if (a.mode == Mode::STRING) { + writer->writeAttribute(convertor.from_bytes(n->eval_to_string(attributeXpath, ns))); + } else if (a.mode == Mode::BOOLEAN) { + writer->writeAttribute(n->eval_to_boolean(attributeXpath, ns) ? L"true" : L"false"); + } else if (a.mode == Mode::LINE_NUMBER) { + xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); + string_t line = attributeNodes.size() ? std::to_wstring(attributeNodes[0]->get_line()) : L""; // TODO: null + writer->writeAttribute(line); + } else if (a.mode == Mode::XPATH) { + xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); + string_t line = attributeNodes.size() ? convertor.from_bytes(attributeNodes[0]->get_path()) : L""; // TODO: null + writer->writeAttribute(line); + } else if (a.mode == Mode::RAW_XML) { + writer->writeAttribute(toRawXML(n, a, ns)); + } else { + throw logic_error("Unsupported mode."); // should never happer, TODO: better relpipe exception + } + } + } + } + } +}; + +} +} +}