1 /** |
|
2 * Relational pipes |
|
3 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 * |
|
5 * This program is free software: you can redistribute it and/or modify |
|
6 * it under the terms of the GNU General Public License as published by |
|
7 * the Free Software Foundation, version 3 of the License. |
|
8 * |
|
9 * This program is distributed in the hope that it will be useful, |
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 * GNU General Public License for more details. |
|
13 * |
|
14 * You should have received a copy of the GNU General Public License |
|
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 */ |
|
17 #pragma once |
|
18 |
|
19 #include <cstdlib> |
|
20 #include <iostream> |
|
21 #include <string> |
|
22 #include <sstream> |
|
23 #include <vector> |
|
24 #include <algorithm> |
|
25 #include <exception> |
|
26 #include <regex> |
|
27 |
|
28 #include <libxml++-2.6/libxml++/libxml++.h> |
|
29 |
|
30 #include <relpipe/writer/typedefs.h> |
|
31 |
|
32 #include "Configuration.h" |
|
33 #include "XMLDocumentConstructor.h" |
|
34 |
|
35 namespace relpipe { |
|
36 namespace in { |
|
37 namespace xmltable { |
|
38 |
|
39 using namespace relpipe::writer; |
|
40 |
|
41 class XMLCommand { |
|
42 private: |
|
43 std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings. |
|
44 |
|
45 string_t formatRawXML(string_t rawXML) { |
|
46 std::wregex pattern(L"^(<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\\s*)+|\n$"); |
|
47 // libxml sometimes returns doubled XML declaration (probably a bug), see: |
|
48 // --relation ini --records '/' --attribute 'xml' string '.' --mode raw-xml # (but not for --records '//*') |
|
49 // so we remove all of them and also trailing line ends (if any). |
|
50 return std::regex_replace(rawXML, pattern, L""); |
|
51 } |
|
52 |
|
53 void importNode(xmlpp::Node* parent, xmlpp::Node* child, AttributeRecipe attributeRecipe) { |
|
54 if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns( |
|
55 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), |
|
56 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), |
|
57 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); |
|
58 else parent->import_node(child, true); |
|
59 } |
|
60 |
|
61 void importNode(xmlpp::Document* document, xmlpp::Node* child, AttributeRecipe attributeRecipe) { |
|
62 if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node( |
|
63 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), |
|
64 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), |
|
65 convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); |
|
66 else document->create_root_node_by_import(child, true); |
|
67 } |
|
68 |
|
69 string_t toRawXML(xmlpp::Node* parent, AttributeRecipe attributeRecipe, xmlpp::Node::PrefixNsMap ns) { |
|
70 xmlpp::Document d; |
|
71 xmlpp::NodeSet nodes = parent->find(convertor.to_bytes(attributeRecipe.xpath), ns); |
|
72 |
|
73 if (attributeRecipe.rawXmlNodeListWrapper.name.size()) { |
|
74 d.create_root_node( |
|
75 convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.name), |
|
76 convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.uri), |
|
77 convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.prefix)); |
|
78 for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node, attributeRecipe); |
|
79 } else { |
|
80 if (nodes.size() == 1) importNode(&d, nodes[0], attributeRecipe); |
|
81 else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception |
|
82 else return L""; // TODO: null |
|
83 } |
|
84 return formatRawXML(convertor.from_bytes(d.write_to_string())); |
|
85 } |
|
86 |
|
87 public: |
|
88 |
|
89 void process(std::istream& input, std::ostream& output, Configuration& configuration) { |
|
90 std::shared_ptr<RelationalWriter> writer(Factory::create(output)); |
|
91 |
|
92 xmlpp::DomParser parser; |
|
93 XMLDocumentConstructor documentConstructor(&input, &parser); |
|
94 for (ParserOptionRecipe o : configuration.parserOptions) documentConstructor.setOption(convertor.to_bytes(o.uri), convertor.to_bytes(o.value)); |
|
95 documentConstructor.process(); |
|
96 if (configuration.xinclude) parser.get_document()->process_xinclude(true); |
|
97 xmlpp::Element* root = parser.get_document()->get_root_node(); |
|
98 |
|
99 xmlpp::Node::PrefixNsMap ns; |
|
100 for (int i = 0; i < configuration.namespaceMappings.size(); i++) { |
|
101 std::string prefix = convertor.to_bytes(configuration.namespaceMappings[i]); |
|
102 std::string uri = convertor.to_bytes(configuration.namespaceMappings[++i]); |
|
103 ns[prefix] = uri; |
|
104 } |
|
105 |
|
106 for (const RelationConfiguration& r : configuration.relationConfigurations) { |
|
107 std::vector<relpipe::writer::AttributeMetadata> attributesMetadata; |
|
108 for (AttributeRecipe a : r.attributes) attributesMetadata.push_back(AttributeMetadata{a.name, a.type}); |
|
109 relpipe::writer::string_t name = r.nameIsXPath ? convertor.from_bytes(root->eval_to_string(convertor.to_bytes(r.relation), ns)) : r.relation; |
|
110 writer->startRelation(name, attributesMetadata, true); |
|
111 for (xmlpp::Node* n : root->find(convertor.to_bytes(r.xpath), ns)) { |
|
112 for (AttributeRecipe a : r.attributes) { |
|
113 // TODO: convert to bytes only once |
|
114 std::string attributeXpath = convertor.to_bytes(a.xpath); |
|
115 if (a.mode == Mode::STRING) { |
|
116 writer->writeAttribute(convertor.from_bytes(n->eval_to_string(attributeXpath, ns))); |
|
117 } else if (a.mode == Mode::BOOLEAN) { |
|
118 writer->writeAttribute(n->eval_to_boolean(attributeXpath, ns) ? L"true" : L"false"); |
|
119 } else if (a.mode == Mode::LINE_NUMBER) { |
|
120 xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); |
|
121 string_t line = attributeNodes.size() ? std::to_wstring(attributeNodes[0]->get_line()) : L""; // TODO: null |
|
122 writer->writeAttribute(line); |
|
123 } else if (a.mode == Mode::XPATH) { |
|
124 xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); |
|
125 string_t line = attributeNodes.size() ? convertor.from_bytes(attributeNodes[0]->get_path()) : L""; // TODO: null |
|
126 writer->writeAttribute(line); |
|
127 } else if (a.mode == Mode::RAW_XML) { |
|
128 writer->writeAttribute(toRawXML(n, a, ns)); |
|
129 } else { |
|
130 throw logic_error("Unsupported mode."); // should never happer, TODO: better relpipe exception |
|
131 } |
|
132 } |
|
133 } |
|
134 } |
|
135 } |
|
136 }; |
|
137 |
|
138 } |
|
139 } |
|
140 } |
|