author | František Kučera <franta-hg@frantovo.cz> |
Tue, 28 Jan 2020 14:26:39 +0100 | |
branch | v_0 |
changeset 67 | 0766d298eb1c |
parent 65 | 6944a03fb883 |
child 68 | 5d3d57d9c323 |
permissions | -rw-r--r-- |
65
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
1 |
/** |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
2 |
* Relational pipes |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
3 |
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
4 |
* |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
5 |
* This program is free software: you can redistribute it and/or modify |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
6 |
* it under the terms of the GNU General Public License as published by |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
7 |
* the Free Software Foundation, version 3 of the License. |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
8 |
* |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
9 |
* This program is distributed in the hope that it will be useful, |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
12 |
* GNU General Public License for more details. |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
13 |
* |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
14 |
* You should have received a copy of the GNU General Public License |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
16 |
*/ |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
17 |
|
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
18 |
#include "streamlet-common.h" |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
19 |
|
67
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
20 |
#include <unistd.h> |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
21 |
#include <regex> |
65
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
22 |
#include <libxml++-2.6/libxml++/libxml++.h> |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
23 |
|
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
24 |
class XPathStreamlet : public Streamlet { |
67
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
25 |
private: |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
26 |
xmlpp::Node::PrefixNsMap ns; |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
27 |
|
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
28 |
void findXmlnsInEnvironment() { |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
29 |
std::regex xmlnsEnvPattern("xmlns_(.*)=(.*)"); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
30 |
std::cmatch match; |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
31 |
for (char **env = environ; *env; env++) if (std::regex_match(*env, match, xmlnsEnvPattern)) ns[std::string(match[1])] = match[2]; |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
32 |
} |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
33 |
|
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
34 |
void findXmlnsInOptions() { |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
35 |
for (Option o : getOptions(std::wregex(L"xmlns[:_](.*)"))) ns[convertor.to_bytes(o.nameMatch[1])] = convertor.to_bytes(o.value); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
36 |
for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[convertor.to_bytes(o.valueMatch[1])] = convertor.to_bytes(o.valueMatch[2]); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
37 |
} |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
38 |
|
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
39 |
class XPathAttribute { |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
40 |
public: |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
41 |
|
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
42 |
std::wstring name; |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
43 |
std::wstring xpath; |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
44 |
}; |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
45 |
|
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
46 |
std::vector<XPathAttribute> xpathAttributes; |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
47 |
|
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
48 |
protected: |
65
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
49 |
|
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
50 |
std::vector<AttributeMetadata> getOutputAttributesMetadata() override { |
67
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
51 |
findXmlnsInEnvironment(); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
52 |
findXmlnsInOptions(); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
53 |
|
65
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
54 |
std::vector<AttributeMetadata> oam; |
67
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
55 |
|
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
56 |
std::vector<Option> attributeOptions = getOptions(L"attribute"); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
57 |
for (int i = 0, limit = attributeOptions.size(); i < limit; i++) { |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
58 |
std::wstring alias = getAlias(i, attributeOptions[i].value); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
59 |
xpathAttributes.push_back({alias, attributeOptions[i].value}); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
60 |
oam.push_back({alias, STRING}); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
61 |
} |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
62 |
|
65
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
63 |
return oam; |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
64 |
} |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
65 |
|
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
66 |
std::vector<OutputAttribute> getOutputAttributes() override { |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
67 |
std::vector<OutputAttribute> oa; |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
68 |
|
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
69 |
try { |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
70 |
xmlpp::DomParser parser; |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
71 |
parser.parse_file(convertor.to_bytes(currentFile)); |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
72 |
xmlpp::Element* root = parser.get_document()->get_root_node(); |
67
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
73 |
|
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
74 |
for (XPathAttribute xpathAttribute : xpathAttributes) { |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
75 |
// TODO: support various modes like in XMLTableCommand |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
76 |
std::wstring result = convertor.from_bytes(root->eval_to_string(convertor.to_bytes(xpathAttribute.xpath), ns)); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
77 |
oa.push_back({result, false}); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
78 |
} |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
79 |
} catch (xmlpp::parse_error& e) { |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
80 |
for (XPathAttribute xpathAttribute : xpathAttributes) oa.push_back({L"", true}); |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
81 |
// invalid XML → xmlpp::parse_error → just skip this file |
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
82 |
// invalid XPath → xmlpp::exception → failure |
65
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
83 |
} |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
84 |
|
67
0766d298eb1c
streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents:
65
diff
changeset
|
85 |
|
65
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
86 |
return oa; |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
87 |
} |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
88 |
}; |
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
89 |
|
6944a03fb883
streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
90 |
STREAMLET_RUN(XPathStreamlet) |