streamlet-examples/xpath.cpp
author František Kučera <franta-hg@frantovo.cz>
Tue, 28 Jan 2020 14:26:39 +0100
branchv_0
changeset 67 0766d298eb1c
parent 65 6944a03fb883
child 68 5d3d57d9c323
permissions -rw-r--r--
streamlet examples: xpath
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
#include "streamlet-common.h"
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    20
#include <unistd.h>
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    21
#include <regex>
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <libxml++-2.6/libxml++/libxml++.h>
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
class XPathStreamlet : public Streamlet {
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    25
private:
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    26
	xmlpp::Node::PrefixNsMap ns;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    27
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    28
	void findXmlnsInEnvironment() {
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    29
		std::regex xmlnsEnvPattern("xmlns_(.*)=(.*)");
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    30
		std::cmatch match;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    31
		for (char **env = environ; *env; env++) if (std::regex_match(*env, match, xmlnsEnvPattern)) ns[std::string(match[1])] = match[2];
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    32
	}
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    33
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    34
	void findXmlnsInOptions() {
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    35
		for (Option o : getOptions(std::wregex(L"xmlns[:_](.*)"))) ns[convertor.to_bytes(o.nameMatch[1])] = convertor.to_bytes(o.value);
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    36
		for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[convertor.to_bytes(o.valueMatch[1])] = convertor.to_bytes(o.valueMatch[2]);
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    37
	}
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    38
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    39
	class XPathAttribute {
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    40
	public:
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    41
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    42
		std::wstring name;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    43
		std::wstring xpath;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    44
	};
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    45
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    46
	std::vector<XPathAttribute> xpathAttributes;
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    47
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    48
protected:
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    49
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    50
	std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    51
		findXmlnsInEnvironment();
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    52
		findXmlnsInOptions();
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    53
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    54
		std::vector<AttributeMetadata> oam;
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    55
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    56
		std::vector<Option> attributeOptions = getOptions(L"attribute");
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    57
		for (int i = 0, limit = attributeOptions.size(); i < limit; i++) {
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    58
			std::wstring alias = getAlias(i, attributeOptions[i].value);
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    59
			xpathAttributes.push_back({alias, attributeOptions[i].value});
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    60
			oam.push_back({alias, STRING});
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    61
		}
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    62
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    63
		return oam;
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    64
	}
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    66
	std::vector<OutputAttribute> getOutputAttributes() override {
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    67
		std::vector<OutputAttribute> oa;
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    68
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    69
		try {
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    70
			xmlpp::DomParser parser;
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    71
			parser.parse_file(convertor.to_bytes(currentFile));
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    72
			xmlpp::Element* root = parser.get_document()->get_root_node();
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    73
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    74
			for (XPathAttribute xpathAttribute : xpathAttributes) {
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    75
				// TODO: support various modes like in XMLTableCommand
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    76
				std::wstring result = convertor.from_bytes(root->eval_to_string(convertor.to_bytes(xpathAttribute.xpath), ns));
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    77
				oa.push_back({result, false});
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    78
			}
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    79
		} catch (xmlpp::parse_error& e) {
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    80
			for (XPathAttribute xpathAttribute : xpathAttributes) oa.push_back({L"", true});
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    81
			// invalid XML → xmlpp::parse_error → just skip this file
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    82
			// invalid XPath → xmlpp::exception → failure
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    83
		}
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    84
67
0766d298eb1c streamlet examples: xpath
František Kučera <franta-hg@frantovo.cz>
parents: 65
diff changeset
    85
65
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    86
		return oa;
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    87
	}
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    88
};
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    89
6944a03fb883 streamlet examples: xpath – parse and validate XML document
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    90
STREAMLET_RUN(XPathStreamlet)