streamlet-examples/xpath.cpp
author František Kučera <franta-hg@frantovo.cz>
Tue, 28 Jan 2020 14:26:39 +0100
branchv_0
changeset 67 0766d298eb1c
parent 65 6944a03fb883
child 68 5d3d57d9c323
permissions -rw-r--r--
streamlet examples: xpath

/**
 * Relational pipes
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "streamlet-common.h"

#include <unistd.h>
#include <regex>
#include <libxml++-2.6/libxml++/libxml++.h>

class XPathStreamlet : public Streamlet {
private:
	xmlpp::Node::PrefixNsMap ns;

	void findXmlnsInEnvironment() {
		std::regex xmlnsEnvPattern("xmlns_(.*)=(.*)");
		std::cmatch match;
		for (char **env = environ; *env; env++) if (std::regex_match(*env, match, xmlnsEnvPattern)) ns[std::string(match[1])] = match[2];
	}

	void findXmlnsInOptions() {
		for (Option o : getOptions(std::wregex(L"xmlns[:_](.*)"))) ns[convertor.to_bytes(o.nameMatch[1])] = convertor.to_bytes(o.value);
		for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[convertor.to_bytes(o.valueMatch[1])] = convertor.to_bytes(o.valueMatch[2]);
	}

	class XPathAttribute {
	public:

		std::wstring name;
		std::wstring xpath;
	};

	std::vector<XPathAttribute> xpathAttributes;

protected:

	std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
		findXmlnsInEnvironment();
		findXmlnsInOptions();

		std::vector<AttributeMetadata> oam;

		std::vector<Option> attributeOptions = getOptions(L"attribute");
		for (int i = 0, limit = attributeOptions.size(); i < limit; i++) {
			std::wstring alias = getAlias(i, attributeOptions[i].value);
			xpathAttributes.push_back({alias, attributeOptions[i].value});
			oam.push_back({alias, STRING});
		}

		return oam;
	}

	std::vector<OutputAttribute> getOutputAttributes() override {
		std::vector<OutputAttribute> oa;

		try {
			xmlpp::DomParser parser;
			parser.parse_file(convertor.to_bytes(currentFile));
			xmlpp::Element* root = parser.get_document()->get_root_node();

			for (XPathAttribute xpathAttribute : xpathAttributes) {
				// TODO: support various modes like in XMLTableCommand
				std::wstring result = convertor.from_bytes(root->eval_to_string(convertor.to_bytes(xpathAttribute.xpath), ns));
				oa.push_back({result, false});
			}
		} catch (xmlpp::parse_error& e) {
			for (XPathAttribute xpathAttribute : xpathAttributes) oa.push_back({L"", true});
			// invalid XML → xmlpp::parse_error → just skip this file
			// invalid XPath → xmlpp::exception → failure
		}


		return oa;
	}
};

STREAMLET_RUN(XPathStreamlet)