author | František Kučera <franta-hg@frantovo.cz> |
Tue, 23 Nov 2021 20:38:05 +0100 | |
branch | v_0 |
changeset 3 | b30965489f42 |
parent 0 | 2f783f0573fa |
permissions | -rw-r--r-- |
0 | 1 |
/** |
2 |
* Relational pipes |
|
3 |
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 |
* |
|
5 |
* This program is free software: you can redistribute it and/or modify |
|
6 |
* it under the terms of the GNU General Public License as published by |
|
7 |
* the Free Software Foundation, version 3 of the License. |
|
8 |
* |
|
9 |
* This program is distributed in the hope that it will be useful, |
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
* GNU General Public License for more details. |
|
13 |
* |
|
14 |
* You should have received a copy of the GNU General Public License |
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 |
*/ |
|
17 |
#pragma once |
|
18 |
||
19 |
#include <cstdlib> |
|
20 |
#include <iostream> |
|
21 |
#include <string> |
|
22 |
#include <sstream> |
|
23 |
#include <vector> |
|
24 |
#include <algorithm> |
|
25 |
#include <exception> |
|
26 |
#include <regex> |
|
27 |
||
28 |
#include <libxml++-2.6/libxml++/libxml++.h> |
|
29 |
||
30 |
#include <relpipe/writer/typedefs.h> |
|
31 |
||
32 |
#include "Configuration.h" |
|
33 |
#include "XMLDocumentConstructor.h" |
|
34 |
||
35 |
namespace relpipe { |
|
36 |
namespace in { |
|
37 |
namespace xmltable { |
|
38 |
||
39 |
using namespace relpipe::writer; |
|
40 |
||
41 |
class XMLCommand { |
|
42 |
private: |
|
43 |
std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings. |
|
44 |
||
45 |
string_t formatRawXML(string_t rawXML) { |
|
3
b30965489f42
fix raw-xml formatting (remove XML declaration)
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
46 |
std::wregex pattern(L"^(<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\\s*)+|\n$"); |
b30965489f42
fix raw-xml formatting (remove XML declaration)
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
47 |
// libxml sometimes returns doubled XML declaration (probably a bug), see: |
b30965489f42
fix raw-xml formatting (remove XML declaration)
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
48 |
// --relation ini --records '/' --attribute 'xml' string '.' --mode raw-xml # (but not for --records '//*') |
b30965489f42
fix raw-xml formatting (remove XML declaration)
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
49 |
// so we remove all of them and also trailing line ends (if any). |
0 | 50 |
return std::regex_replace(rawXML, pattern, L""); |
51 |
} |
|
52 |
||
53 |
void importNode(xmlpp::Node* parent, xmlpp::Node* child, AttributeRecipe attributeRecipe) { |
|
54 |
if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns( |
|
55 |
convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), |
|
56 |
convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), |
|
57 |
convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); |
|
58 |
else parent->import_node(child, true); |
|
59 |
} |
|
60 |
||
61 |
void importNode(xmlpp::Document* document, xmlpp::Node* child, AttributeRecipe attributeRecipe) { |
|
62 |
if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node( |
|
63 |
convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), |
|
64 |
convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), |
|
65 |
convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); |
|
66 |
else document->create_root_node_by_import(child, true); |
|
67 |
} |
|
68 |
||
69 |
string_t toRawXML(xmlpp::Node* parent, AttributeRecipe attributeRecipe, xmlpp::Node::PrefixNsMap ns) { |
|
70 |
xmlpp::Document d; |
|
71 |
xmlpp::NodeSet nodes = parent->find(convertor.to_bytes(attributeRecipe.xpath), ns); |
|
72 |
||
73 |
if (attributeRecipe.rawXmlNodeListWrapper.name.size()) { |
|
74 |
d.create_root_node( |
|
75 |
convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.name), |
|
76 |
convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.uri), |
|
77 |
convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.prefix)); |
|
78 |
for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node, attributeRecipe); |
|
79 |
} else { |
|
80 |
if (nodes.size() == 1) importNode(&d, nodes[0], attributeRecipe); |
|
81 |
else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception |
|
82 |
else return L""; // TODO: null |
|
83 |
} |
|
84 |
return formatRawXML(convertor.from_bytes(d.write_to_string())); |
|
85 |
} |
|
86 |
||
87 |
public: |
|
88 |
||
89 |
void process(std::istream& input, std::ostream& output, Configuration& configuration) { |
|
90 |
std::shared_ptr<RelationalWriter> writer(Factory::create(output)); |
|
91 |
||
92 |
xmlpp::DomParser parser; |
|
93 |
XMLDocumentConstructor documentConstructor(&input, &parser); |
|
94 |
for (ParserOptionRecipe o : configuration.parserOptions) documentConstructor.setOption(convertor.to_bytes(o.uri), convertor.to_bytes(o.value)); |
|
95 |
documentConstructor.process(); |
|
96 |
if (configuration.xinclude) parser.get_document()->process_xinclude(true); |
|
97 |
xmlpp::Element* root = parser.get_document()->get_root_node(); |
|
98 |
||
99 |
xmlpp::Node::PrefixNsMap ns; |
|
100 |
for (int i = 0; i < configuration.namespaceMappings.size(); i++) { |
|
101 |
std::string prefix = convertor.to_bytes(configuration.namespaceMappings[i]); |
|
102 |
std::string uri = convertor.to_bytes(configuration.namespaceMappings[++i]); |
|
103 |
ns[prefix] = uri; |
|
104 |
} |
|
105 |
||
106 |
for (const RelationConfiguration& r : configuration.relationConfigurations) { |
|
107 |
std::vector<relpipe::writer::AttributeMetadata> attributesMetadata; |
|
108 |
for (AttributeRecipe a : r.attributes) attributesMetadata.push_back(AttributeMetadata{a.name, a.type}); |
|
109 |
relpipe::writer::string_t name = r.nameIsXPath ? convertor.from_bytes(root->eval_to_string(convertor.to_bytes(r.relation), ns)) : r.relation; |
|
110 |
writer->startRelation(name, attributesMetadata, true); |
|
111 |
for (xmlpp::Node* n : root->find(convertor.to_bytes(r.xpath), ns)) { |
|
112 |
for (AttributeRecipe a : r.attributes) { |
|
113 |
// TODO: convert to bytes only once |
|
114 |
std::string attributeXpath = convertor.to_bytes(a.xpath); |
|
115 |
if (a.mode == Mode::STRING) { |
|
116 |
writer->writeAttribute(convertor.from_bytes(n->eval_to_string(attributeXpath, ns))); |
|
117 |
} else if (a.mode == Mode::BOOLEAN) { |
|
118 |
writer->writeAttribute(n->eval_to_boolean(attributeXpath, ns) ? L"true" : L"false"); |
|
119 |
} else if (a.mode == Mode::LINE_NUMBER) { |
|
120 |
xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); |
|
121 |
string_t line = attributeNodes.size() ? std::to_wstring(attributeNodes[0]->get_line()) : L""; // TODO: null |
|
122 |
writer->writeAttribute(line); |
|
123 |
} else if (a.mode == Mode::XPATH) { |
|
124 |
xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); |
|
125 |
string_t line = attributeNodes.size() ? convertor.from_bytes(attributeNodes[0]->get_path()) : L""; // TODO: null |
|
126 |
writer->writeAttribute(line); |
|
127 |
} else if (a.mode == Mode::RAW_XML) { |
|
128 |
writer->writeAttribute(toRawXML(n, a, ns)); |
|
129 |
} else { |
|
130 |
throw logic_error("Unsupported mode."); // should never happer, TODO: better relpipe exception |
|
131 |
} |
|
132 |
} |
|
133 |
} |
|
134 |
} |
|
135 |
} |
|
136 |
}; |
|
137 |
||
138 |
} |
|
139 |
} |
|
140 |
} |