streamlet-examples/xpath.cpp
author František Kučera <franta-hg@frantovo.cz>
Thu, 30 Jan 2020 18:04:10 +0100
branchv_0
changeset 77 a680bcd946cd
parent 75 ecbf6504915c
child 78 5a63bf594f53
permissions -rw-r--r--
streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
#include "streamlet-common.h"
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    20
#include <unistd.h>
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    21
#include <regex>
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <libxml++-2.6/libxml++/libxml++.h>
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
74
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    24
/**
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    25
 * This streamlet provides values from XML files.
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    26
 * It uses the XPath language to define, what portion of XML should be returned.
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    27
 * 
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    28
 * With no options it does not provide any attributes.
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    29
 * 
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    30
 * XPath expressions are passed as 'attribute' options.
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    31
 * e.g. --option 'attribute' 'name()' will return single attribute with the name of the root node.
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    32
 * 
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    33
 * Attributes can be renamed using aliases: --option 'attribute' 'name()' --as 'name'. Otherwise the full XPath expression is used as a name.
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    34
 * Number of aliases should match the number of attributes (otherwise only first attributes are renamed, because aliases are global, not relative to the --option).
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    35
 * 
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    36
 * Like relpipe-in-xmltable, this streamlet supports several modes:
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    37
 *  - string
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    38
 *  - boolean
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    39
 *  - raw-xml
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    40
 *  - line-number
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    41
 *  - xpath
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    42
 * 
77
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    43
 * The raw-xml mode provides a portion of the original XML defined by the XPath and can be further parametrized by options:
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    44
 *  - raw-xml-nodelist-wrapper-name
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    45
 *  - raw-xml-nodelist-wrapper-uri
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    46
 *  - raw-xml-nodelist-wrapper-prefix
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    47
 *  - raw-xml-attribute-wrapper-name
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    48
 *  - raw-xml-attribute-wrapper-uri
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    49
 *  - raw-xml-attribute-wrapper-prefix
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    50
 * 
74
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    51
 * TODO: more OOP, move to separate repository, proper CMake project, clean-up, stabilize API
a2aa84f310a5 streamlet examples: documentation
František Kučera <franta-hg@frantovo.cz>
parents: 70
diff changeset
    52
 */
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    53
class XPathStreamlet : public Streamlet {
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    54
private:
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    55
	xmlpp::Node::PrefixNsMap ns;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    56
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    57
	void findXmlnsInEnvironment() {
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    58
		std::regex xmlnsEnvPattern("xmlns_(.*)=(.*)");
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    59
		std::cmatch match;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    60
		for (char **env = environ; *env; env++) if (std::regex_match(*env, match, xmlnsEnvPattern)) ns[std::string(match[1])] = match[2];
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    61
	}
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    62
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    63
	void findXmlnsInOptions() {
75
ecbf6504915c streamlet examples: encapsulate and hide abstract class fields
František Kučera <franta-hg@frantovo.cz>
parents: 74
diff changeset
    64
		for (Option o : getOptions(std::wregex(L"xmlns[:_](.*)"))) ns[toBytes(o.nameMatch[1])] = toBytes(o.value);
ecbf6504915c streamlet examples: encapsulate and hide abstract class fields
František Kučera <franta-hg@frantovo.cz>
parents: 74
diff changeset
    65
		for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[toBytes(o.valueMatch[1])] = toBytes(o.valueMatch[2]);
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    66
	}
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    67
77
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    68
	std::wstring rawXmlNodeListWrapperName;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    69
	std::wstring rawXmlNodeListWrapperUri;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    70
	std::wstring rawXmlNodeListWrapperPrefix;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    71
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    72
	std::wstring rawXmlAttributeWrapperName = L"attribute";
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    73
	std::wstring rawXmlAttributeWrapperUri;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    74
	std::wstring rawXmlAttributeWrapperPrefix;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    75
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    76
	void findRawXmlOptions() {
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    77
		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-name")) rawXmlNodeListWrapperName = o.value;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    78
		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-uri")) rawXmlNodeListWrapperUri = o.value;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    79
		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-prefix")) rawXmlNodeListWrapperPrefix = o.value;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    80
		for (Option o : getOptions(L"raw-xml-attribute-wrapper-name")) rawXmlAttributeWrapperName = o.value;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    81
		for (Option o : getOptions(L"raw-xml-attribute-wrapper-uri")) rawXmlAttributeWrapperUri = o.value;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    82
		for (Option o : getOptions(L"raw-xml-attribute-wrapper-prefix")) rawXmlAttributeWrapperPrefix = o.value;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    83
	}
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
    84
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    85
	// Modes should share the logic of relpipe-in-xmltable
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    86
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    87
	enum class Mode {
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    88
		STRING,
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    89
		BOOLEAN,
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    90
		// TODO: support also XML number, when we have a rational or decimal numbers in Relational pipes
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    91
		RAW_XML,
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    92
		LINE_NUMBER,
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    93
		XPATH
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    94
	};
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    95
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    96
	Mode toMode(std::wstring modeName) {
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    97
		if (modeName == L"string") return Mode::STRING;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    98
		else if (modeName == L"boolean") return Mode::BOOLEAN;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
    99
		else if (modeName == L"raw-xml") return Mode::RAW_XML;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   100
		else if (modeName == L"line-number") return Mode::LINE_NUMBER;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   101
		else if (modeName == L"xpath") return Mode::XPATH;
75
ecbf6504915c streamlet examples: encapsulate and hide abstract class fields
František Kučera <franta-hg@frantovo.cz>
parents: 74
diff changeset
   102
		else throw std::invalid_argument("Unsupported mode: " + toBytes(modeName));
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   103
	}
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   104
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   105
	std::wstring toType(Mode mode) {
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   106
		if (mode == Mode::BOOLEAN) return BOOLEAN;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   107
		else if (mode == Mode::LINE_NUMBER) return INTEGER;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   108
		else return STRING;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   109
	}
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   110
77
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   111
	std::wstring formatRawXML(std::wstring rawXML) {
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   112
		std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   113
		return std::regex_replace(rawXML, pattern, L"");
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   114
	}
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   115
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   116
	void importNode(xmlpp::Node* parent, xmlpp::Node* child) {
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   117
		if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns(
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   118
				toBytes(rawXmlAttributeWrapperName),
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   119
				toBytes(rawXmlAttributeWrapperUri),
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   120
				toBytes(rawXmlAttributeWrapperPrefix))->import_node(child);
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   121
		else parent->import_node(child, true);
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   122
	}
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   123
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   124
	void importNode(xmlpp::Document* document, xmlpp::Node* child) {
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   125
		if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node(
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   126
				toBytes(rawXmlAttributeWrapperName),
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   127
				toBytes(rawXmlAttributeWrapperUri),
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   128
				toBytes(rawXmlAttributeWrapperPrefix))->import_node(child);
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   129
		else document->create_root_node_by_import(child, true);
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   130
	}
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   131
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   132
	std::wstring toRawXML(xmlpp::Node* parent, std::string xpath, xmlpp::Node::PrefixNsMap ns) {
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   133
		xmlpp::Document d;
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   134
		xmlpp::NodeSet nodes = parent->find(xpath, ns);
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   135
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   136
		if (rawXmlNodeListWrapperName.size()) {
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   137
			d.create_root_node(
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   138
					toBytes(rawXmlNodeListWrapperName),
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   139
					toBytes(rawXmlNodeListWrapperUri),
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   140
					toBytes(rawXmlNodeListWrapperPrefix));
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   141
			for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node);
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   142
		} else {
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   143
			if (nodes.size() == 1) importNode(&d, nodes[0]);
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   144
			else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   145
			else return L"";
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   146
		}
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   147
		return formatRawXML(fromBytes(d.write_to_string()));
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   148
	}
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   149
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   150
	class XPathAttribute {
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   151
	public:
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   152
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   153
		std::wstring name;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   154
		std::wstring xpath;
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   155
		Mode mode = Mode::STRING;
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   156
	};
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   157
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   158
	std::vector<XPathAttribute> xpathAttributes;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   159
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   160
protected:
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   161
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   162
	std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   163
		findXmlnsInEnvironment();
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   164
		findXmlnsInOptions();
77
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   165
		findRawXmlOptions();
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   166
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   167
		std::vector<AttributeMetadata> oam;
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   168
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   169
		std::vector<Option> modeOptions = getOptions(L"mode");
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   170
		std::vector<Option> attributeOptions = getOptions(L"attribute");
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   171
		for (int i = 0, limit = attributeOptions.size(); i < limit; i++) {
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   172
			Mode mode = i < modeOptions.size() ? toMode(modeOptions[i].value) : Mode::STRING;
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   173
			std::wstring alias = getAlias(i, attributeOptions[i].value);
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   174
			xpathAttributes.push_back({alias, attributeOptions[i].value, mode});
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   175
			oam.push_back({alias, toType(mode)});
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   176
		}
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   177
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   178
		return oam;
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   179
	}
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   180
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   181
	std::vector<OutputAttribute> getOutputAttributes() override {
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   182
		std::vector<OutputAttribute> oa;
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   183
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   184
		try {
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   185
			xmlpp::DomParser parser;
75
ecbf6504915c streamlet examples: encapsulate and hide abstract class fields
František Kučera <franta-hg@frantovo.cz>
parents: 74
diff changeset
   186
			parser.parse_file(toBytes(getCurrentFile()));
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   187
			xmlpp::Element* root = parser.get_document()->get_root_node();
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   188
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   189
			for (XPathAttribute xpathAttribute : xpathAttributes) {
75
ecbf6504915c streamlet examples: encapsulate and hide abstract class fields
František Kučera <franta-hg@frantovo.cz>
parents: 74
diff changeset
   190
				std::string xpath = toBytes(xpathAttribute.xpath);
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   191
				std::wstring result;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   192
				bool isNull = false;
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   193
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   194
				if (xpathAttribute.mode == Mode::STRING) {
75
ecbf6504915c streamlet examples: encapsulate and hide abstract class fields
František Kučera <franta-hg@frantovo.cz>
parents: 74
diff changeset
   195
					result = fromBytes(root->eval_to_string(xpath, ns));
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   196
				} else if (xpathAttribute.mode == Mode::BOOLEAN) {
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   197
					result = root->eval_to_boolean(xpath, ns) ? L"true" : L"false";
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   198
				} else if (xpathAttribute.mode == Mode::LINE_NUMBER) {
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   199
					xmlpp::NodeSet attributeNodes = root->find(xpath, ns);
70
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 68
diff changeset
   200
					if (attributeNodes.size()) result = std::to_wstring(attributeNodes[0]->get_line());
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 68
diff changeset
   201
					else isNull = true;
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   202
				} else if (xpathAttribute.mode == Mode::XPATH) {
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   203
					xmlpp::NodeSet attributeNodes = root->find(xpath, ns);
75
ecbf6504915c streamlet examples: encapsulate and hide abstract class fields
František Kučera <franta-hg@frantovo.cz>
parents: 74
diff changeset
   204
					if (attributeNodes.size()) result = fromBytes(attributeNodes[0]->get_path());
70
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 68
diff changeset
   205
					else isNull = true;
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   206
				} else if (xpathAttribute.mode == Mode::RAW_XML) {
77
a680bcd946cd streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
František Kučera <franta-hg@frantovo.cz>
parents: 75
diff changeset
   207
					result = toRawXML(root, xpath, ns);
68
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   208
				} else {
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   209
					throw std::logic_error("Unsupported mode."); // should never happer
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   210
				}
5d3d57d9c323 streamlet examples: xpath: support multiple modes (string, boolean, line-number, xpath), TODO: raw-xml
František Kučera <franta-hg@frantovo.cz>
parents: 67
diff changeset
   211
70
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 68
diff changeset
   212
				oa.push_back({result, isNull});
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   213
			}
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   214
		} catch (xmlpp::parse_error& e) {
70
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 68
diff changeset
   215
			for (XPathAttribute xpathAttribute : xpathAttributes) oa.push_back({L"", true});
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   216
			// invalid XML → xmlpp::parse_error → just skip this file
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   217
			// invalid XPath → xmlpp::exception → failure
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   218
		}
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   219
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
   220
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   221
		return oa;
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   222
	}
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   223
};
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   224
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   225
STREAMLET_RUN(XPathStreamlet)