|
1 /** |
|
2 * Relational pipes |
|
3 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 * |
|
5 * This program is free software: you can redistribute it and/or modify |
|
6 * it under the terms of the GNU General Public License as published by |
|
7 * the Free Software Foundation, version 3 of the License. |
|
8 * |
|
9 * This program is distributed in the hope that it will be useful, |
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 * GNU General Public License for more details. |
|
13 * |
|
14 * You should have received a copy of the GNU General Public License |
|
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 */ |
|
17 #pragma once |
|
18 |
|
19 #include <cstdlib> |
|
20 #include <iostream> |
|
21 #include <string> |
|
22 #include <sstream> |
|
23 #include <vector> |
|
24 #include <algorithm> |
|
25 #include <exception> |
|
26 #include <regex> |
|
27 |
|
28 #include <libxml++-2.6/libxml++/libxml++.h> |
|
29 |
|
30 #include <relpipe/writer/typedefs.h> |
|
31 |
|
32 #include "Configuration.h" |
|
33 #include "XMLDocumentConstructor.h" |
|
34 |
|
35 namespace relpipe { |
|
36 namespace in { |
|
37 namespace xmltable { |
|
38 |
|
39 using namespace relpipe::writer; |
|
40 |
|
41 class XMLCommand { |
|
42 private: |
|
43 std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings. |
|
44 |
|
45 string_t formatRawXML(string_t rawXML) { |
|
46 std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$"); |
|
47 return std::regex_replace(rawXML, pattern, L""); |
|
48 } |
|
49 |
|
50 void importNode(xmlpp::Node* parent, xmlpp::Node* child, AttributeRecipe attributeRecipe) { |
|
51 if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns( |
|
52 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), |
|
53 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), |
|
54 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); |
|
55 else parent->import_node(child, true); |
|
56 } |
|
57 |
|
58 void importNode(xmlpp::Document* document, xmlpp::Node* child, AttributeRecipe attributeRecipe) { |
|
59 if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node( |
|
60 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), |
|
61 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), |
|
62 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); |
|
63 else document->create_root_node_by_import(child, true); |
|
64 } |
|
65 |
|
66 string_t toRawXML(xmlpp::Node* parent, AttributeRecipe attributeRecipe, xmlpp::Node::PrefixNsMap ns) { |
|
67 xmlpp::Document d; |
|
68 xmlpp::NodeSet nodes = parent->find(convertor.to_bytes(attributeRecipe.xpath), ns); |
|
69 |
|
70 if (attributeRecipe.rawXmlNodeListWrapper.name.size()) { |
|
71 d.create_root_node( |
|
72 convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.name), |
|
73 convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.uri), |
|
74 convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.prefix)); |
|
75 for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node, attributeRecipe); |
|
76 } else { |
|
77 if (nodes.size() == 1) importNode(&d, nodes[0], attributeRecipe); |
|
78 else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception |
|
79 else return L""; // TODO: null |
|
80 } |
|
81 return formatRawXML(convertor.from_bytes(d.write_to_string())); |
|
82 } |
|
83 |
|
84 public: |
|
85 |
|
86 void process(std::istream& input, std::ostream& output, Configuration& configuration) { |
|
87 std::shared_ptr<RelationalWriter> writer(Factory::create(output)); |
|
88 |
|
89 xmlpp::DomParser parser; |
|
90 XMLDocumentConstructor documentConstructor(&input, &parser); |
|
91 for (ParserOptionRecipe o : configuration.parserOptions) documentConstructor.setOption(convertor.to_bytes(o.uri), convertor.to_bytes(o.value)); |
|
92 documentConstructor.process(); |
|
93 if (configuration.xinclude) parser.get_document()->process_xinclude(true); |
|
94 xmlpp::Element* root = parser.get_document()->get_root_node(); |
|
95 |
|
96 xmlpp::Node::PrefixNsMap ns; |
|
97 for (int i = 0; i < configuration.namespaceMappings.size(); i++) { |
|
98 std::string prefix = convertor.to_bytes(configuration.namespaceMappings[i]); |
|
99 std::string uri = convertor.to_bytes(configuration.namespaceMappings[++i]); |
|
100 ns[prefix] = uri; |
|
101 } |
|
102 |
|
103 for (const RelationConfiguration& r : configuration.relationConfigurations) { |
|
104 std::vector<relpipe::writer::AttributeMetadata> attributesMetadata; |
|
105 for (AttributeRecipe a : r.attributes) attributesMetadata.push_back(AttributeMetadata{a.name, a.type}); |
|
106 relpipe::writer::string_t name = r.nameIsXPath ? convertor.from_bytes(root->eval_to_string(convertor.to_bytes(r.relation), ns)) : r.relation; |
|
107 writer->startRelation(name, attributesMetadata, true); |
|
108 for (xmlpp::Node* n : root->find(convertor.to_bytes(r.xpath), ns)) { |
|
109 for (AttributeRecipe a : r.attributes) { |
|
110 // TODO: convert to bytes only once |
|
111 std::string attributeXpath = convertor.to_bytes(a.xpath); |
|
112 if (a.mode == Mode::STRING) { |
|
113 writer->writeAttribute(convertor.from_bytes(n->eval_to_string(attributeXpath, ns))); |
|
114 } else if (a.mode == Mode::BOOLEAN) { |
|
115 writer->writeAttribute(n->eval_to_boolean(attributeXpath, ns) ? L"true" : L"false"); |
|
116 } else if (a.mode == Mode::LINE_NUMBER) { |
|
117 xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); |
|
118 string_t line = attributeNodes.size() ? std::to_wstring(attributeNodes[0]->get_line()) : L""; // TODO: null |
|
119 writer->writeAttribute(line); |
|
120 } else if (a.mode == Mode::XPATH) { |
|
121 xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); |
|
122 string_t line = attributeNodes.size() ? convertor.from_bytes(attributeNodes[0]->get_path()) : L""; // TODO: null |
|
123 writer->writeAttribute(line); |
|
124 } else if (a.mode == Mode::RAW_XML) { |
|
125 writer->writeAttribute(toRawXML(n, a, ns)); |
|
126 } else { |
|
127 throw logic_error("Unsupported mode."); // should never happer, TODO: better relpipe exception |
|
128 } |
|
129 } |
|
130 } |
|
131 } |
|
132 } |
|
133 }; |
|
134 |
|
135 } |
|
136 } |
|
137 } |