streamlet examples: xpath v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Tue, 28 Jan 2020 14:26:39 +0100
branchv_0
changeset 67 0766d298eb1c
parent 66 8a8b6434e4bb
child 68 5d3d57d9c323
streamlet examples: xpath
src/SubProcess.h
streamlet-examples/streamlet-common.h
streamlet-examples/xpath.cpp
--- a/src/SubProcess.h	Mon Jan 27 00:43:39 2020 +0100
+++ b/src/SubProcess.h	Tue Jan 28 14:26:39 2020 +0100
@@ -57,7 +57,7 @@
 			std::wstringstream s;
 			s << L"Message(code: " << code << L", parameters: ";
 			for (int i = 0; i < parameters.size(); i++) {
-				if (i < parameters.size() - 1) s << parameters[i] << L",";
+				if (i < parameters.size() - 1) s << parameters[i] << L", ";
 				else s << parameters[i];
 			}
 			s << L")";
--- a/streamlet-examples/streamlet-common.h	Mon Jan 27 00:43:39 2020 +0100
+++ b/streamlet-examples/streamlet-common.h	Tue Jan 28 14:26:39 2020 +0100
@@ -23,6 +23,7 @@
 #include <sstream>
 #include <codecvt>
 #include <locale>
+#include <regex>
 
 #include "../src/StreamletMsg.h"
 
@@ -36,7 +37,7 @@
 using S = relpipe::in::filesystem::StreamletMsg;
 
 class Streamlet {
-private:
+protected:
 
 	class Message {
 	public:
@@ -59,6 +60,8 @@
 		}
 	};
 
+private:
+
 	static const char SEPARATOR = '\0';
 
 	int readInt() {
@@ -93,6 +96,14 @@
 		return m;
 	}
 
+	/**
+	 * The std::wsmatch contains only references to original string,
+	 * so we need to copy it in order to make it persistent and independent from variables that may evaporate.
+	 */
+	void copyMatches(std::wsmatch& source, std::vector<std::wstring>& destination) {
+		for (std::wstring s : source) destination.emplace_back(s);
+	}
+
 	void processMessages() {
 		while (true) {
 			Message m = read();
@@ -128,6 +139,11 @@
 	public:
 		std::wstring name;
 		std::wstring value;
+		std::vector<std::wstring> nameMatch;
+		std::vector<std::wstring> valueMatch;
+
+		Option(std::wstring name, std::wstring value) : name(name), value(value) {
+		}
 	};
 
 	std::vector<std::wstring> versionsSupported;
@@ -201,6 +217,38 @@
 		else return defaultValue;
 	}
 
+	virtual std::vector<Option> getOptions(std::wstring name) {
+		std::vector<Option> result;
+		for (Option o : options) if (o.name == name) result.push_back(o);
+		return result;
+	}
+
+	virtual std::vector<Option> getOptions(std::wregex namePattern) {
+		std::vector<Option> result;
+		std::wsmatch nameMatch;
+		for (Option o : options) if (std::regex_match(o.name, nameMatch, namePattern)) {
+				copyMatches(nameMatch, o.nameMatch);
+				result.push_back(o);
+			}
+		return result;
+	}
+
+	virtual std::vector<Option> getOptions(std::wregex namePattern, std::wregex valuePattern) {
+		// TODO: support multiple modes: 
+		//   a) throw an exception if valuePattern does not match
+		//   b) return option even if valuePattern does not match (valueMatch will be empty)
+		//   c) skip options with value not matching (current behavior)
+		std::wsmatch nameMatch;
+		std::wsmatch valueMatch;
+		std::vector<Option> result;
+		for (Option o : options) if (std::regex_match(o.name, nameMatch, namePattern) && std::regex_match(o.value, valueMatch, valuePattern)) {
+				copyMatches(nameMatch, o.nameMatch);
+				copyMatches(valueMatch, o.valueMatch);
+				result.push_back(o);
+			}
+		return result;
+	}
+
 	virtual std::vector<AttributeMetadata> getOutputAttributesMetadata() = 0;
 	virtual std::vector<OutputAttribute> getOutputAttributes() = 0;
 
@@ -213,7 +261,11 @@
 		try {
 			processMessages();
 			return 0;
+		} catch (std::exception& e) {
+			write({S::STREAMLET_ERROR, L"xxxx", L"Exception in streamlet: " + convertor.from_bytes(e.what())}); // FIXME: correct error codes
+			return 1;
 		} catch (...) {
+			write({S::STREAMLET_ERROR, L"xxxx", L"Unknown exception in streamlet."}); // FIXME: correct error codes
 			return 1;
 		}
 	}
@@ -225,6 +277,7 @@
 
 #define STREAMLET_RUN(clazz) \
 int main(int argc, char** argv) { \
+	setlocale(LC_ALL, ""); \
 	clazz s; \
 	return s.run(); \
 }
--- a/streamlet-examples/xpath.cpp	Mon Jan 27 00:43:39 2020 +0100
+++ b/streamlet-examples/xpath.cpp	Tue Jan 28 14:26:39 2020 +0100
@@ -17,13 +17,49 @@
 
 #include "streamlet-common.h"
 
+#include <unistd.h>
+#include <regex>
 #include <libxml++-2.6/libxml++/libxml++.h>
 
 class XPathStreamlet : public Streamlet {
+private:
+	xmlpp::Node::PrefixNsMap ns;
+
+	void findXmlnsInEnvironment() {
+		std::regex xmlnsEnvPattern("xmlns_(.*)=(.*)");
+		std::cmatch match;
+		for (char **env = environ; *env; env++) if (std::regex_match(*env, match, xmlnsEnvPattern)) ns[std::string(match[1])] = match[2];
+	}
+
+	void findXmlnsInOptions() {
+		for (Option o : getOptions(std::wregex(L"xmlns[:_](.*)"))) ns[convertor.to_bytes(o.nameMatch[1])] = convertor.to_bytes(o.value);
+		for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[convertor.to_bytes(o.valueMatch[1])] = convertor.to_bytes(o.valueMatch[2]);
+	}
+
+	class XPathAttribute {
+	public:
+
+		std::wstring name;
+		std::wstring xpath;
+	};
+
+	std::vector<XPathAttribute> xpathAttributes;
+
+protected:
 
 	std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
+		findXmlnsInEnvironment();
+		findXmlnsInOptions();
+
 		std::vector<AttributeMetadata> oam;
-		oam.push_back({getAlias(0, L"xpath"), L"string"});
+
+		std::vector<Option> attributeOptions = getOptions(L"attribute");
+		for (int i = 0, limit = attributeOptions.size(); i < limit; i++) {
+			std::wstring alias = getAlias(i, attributeOptions[i].value);
+			xpathAttributes.push_back({alias, attributeOptions[i].value});
+			oam.push_back({alias, STRING});
+		}
+
 		return oam;
 	}
 
@@ -34,11 +70,19 @@
 			xmlpp::DomParser parser;
 			parser.parse_file(convertor.to_bytes(currentFile));
 			xmlpp::Element* root = parser.get_document()->get_root_node();
-			oa.push_back({L"XML OK", false});
-		} catch (...) {
-			oa.push_back({L"invalid XML", true});
+
+			for (XPathAttribute xpathAttribute : xpathAttributes) {
+				// TODO: support various modes like in XMLTableCommand
+				std::wstring result = convertor.from_bytes(root->eval_to_string(convertor.to_bytes(xpathAttribute.xpath), ns));
+				oa.push_back({result, false});
+			}
+		} catch (xmlpp::parse_error& e) {
+			for (XPathAttribute xpathAttribute : xpathAttributes) oa.push_back({L"", true});
+			// invalid XML → xmlpp::parse_error → just skip this file
+			// invalid XPath → xmlpp::exception → failure
 		}
 
+
 		return oa;
 	}
 };