suport multiple modes of reading from XML: string, boolean, raw-xml, line-number, xpath v_0 v0.15
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sun, 05 Jan 2020 01:01:12 +0100
branchv_0
changeset 8 8730e2d0db0e
parent 7 ff69af3c67a3
child 9 b70e616178c0
suport multiple modes of reading from XML: string, boolean, raw-xml, line-number, xpath
bash-completion.sh
src/CLIParser.h
src/Configuration.h
src/XMLTableCommand.h
--- a/bash-completion.sh	Thu Jan 02 23:31:44 2020 +0100
+++ b/bash-completion.sh	Sun Jan 05 01:01:12 2020 +0100
@@ -33,6 +33,14 @@
 		"false"
 	)
 
+	MODE=(
+		"string"
+		"boolean"
+		"raw-xml"
+		"line-number"
+		"xpath"
+	)
+
 	# FIXME: user must type " and then press TAB otherwise the completion is broken due to the : colon
 	#
 	# can be fixed by global modification of environment variable:
@@ -51,15 +59,22 @@
 	)
 
 
-	if   [[ "$w1" == "--relation"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
-	elif [[ "$w1" == "--relation"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
-	elif [[ "$w1" == "--records"       && "x$w0" == "x" ]];    then COMPREPLY=("'/'")
-	elif [[ "$w1" == "--attribute"     && "x$w0" == "x" ]];    then COMPREPLY=("''")
-	elif [[ "$w2" == "--attribute"                      ]];    then COMPREPLY=($(compgen -W "${DATA_TYPE[*]}" -- "$w0"))
-	elif [[ "$w3" == "--attribute"     && "x$w0" == "x" ]];    then COMPREPLY=("''")
-	elif [[ "$w1" == "--namespace"     && "x$w0" == "x" ]];    then COMPREPLY=("''")
-	elif [[ "$w2" == "--namespace"                      ]];    then COMPREPLY=($(compgen -W "${XMLNS[*]}" -- "$w0"))
-	elif [[ "$w1" == "--xinclude"                       ]];    then COMPREPLY=($(compgen -W "${XINCLUDE[*]}" -- "$w0"))
+	if   [[ "$w1" == "--relation"                      && "x$w0" == "x" ]];    then COMPREPLY=("''")
+	elif [[ "$w1" == "--records"                       && "x$w0" == "x" ]];    then COMPREPLY=("'/'")
+	elif [[ "$w1" == "--attribute"                     && "x$w0" == "x" ]];    then COMPREPLY=("''")
+	elif [[ "$w2" == "--attribute"                                      ]];    then COMPREPLY=($(compgen -W "${DATA_TYPE[*]}" -- "$w0"))
+	elif [[ "$w3" == "--attribute"                     && "x$w0" == "x" ]];    then COMPREPLY=("''")
+	elif [[ "$w1" == "--namespace"                     && "x$w0" == "x" ]];    then COMPREPLY=("''")
+	elif [[ "$w2" == "--namespace"                                      ]];    then COMPREPLY=($(compgen -W "${XMLNS[*]}" -- "$w0"))
+	elif [[ "$w1" == "--xinclude"                                       ]];    then COMPREPLY=($(compgen -W "${XINCLUDE[*]}" -- "$w0"))
+	elif [[ "$w1" == "--mode"                                           ]];    then COMPREPLY=($(compgen -W "${MODE[*]}" -- "$w0"))
+	elif [[ "$w1" == "--raw-xml-nodelist-wrapper"                       ]];    then COMPREPLY=("'xml'")
+	elif [[ "$w2" == "--raw-xml-nodelist-wrapper"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
+	elif [[ "$w3" == "--raw-xml-nodelist-wrapper"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
+
+	elif [[ "$w1" == "--raw-xml-attribute-wrapper"                       ]];    then COMPREPLY=("'attribute'")
+	elif [[ "$w2" == "--raw-xml-attribute-wrapper"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
+	elif [[ "$w3" == "--raw-xml-attribute-wrapper"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
 	else
 		OPTIONS=(
 			"--namespace"
@@ -68,6 +83,9 @@
 			"--name-is-xpath"
 			"--attribute"
 			"--xinclude"
+			"--mode"
+			"--raw-xml-nodelist-wrapper"
+			"--raw-xml-attribute-wrapper"
 		)
 		COMPREPLY=($(compgen -W "${OPTIONS[*]}" -- "$w0"))
 	fi
--- a/src/CLIParser.h	Thu Jan 02 23:31:44 2020 +0100
+++ b/src/CLIParser.h	Sun Jan 05 01:01:12 2020 +0100
@@ -68,6 +68,9 @@
 	static const string_t OPTION_RECORDS;
 	static const string_t OPTION_ATTRIBUTE;
 	static const string_t OPTION_XINCLUDE;
+	static const string_t OPTION_MODE;
+	static const string_t OPTION_RAW_XML_NODELIST_WRAPPER;
+	static const string_t OPTION_RAW_XML_ATTRIBUTE_WRAPPER;
 
 	Configuration parse(const std::vector<string_t>& arguments) {
 		Configuration c;
@@ -90,10 +93,32 @@
 				currentRelation.xpath = readNext(arguments, i);
 			} else if (option == OPTION_ATTRIBUTE) {
 				AttributeRecipe attribute;
+				attribute.mode = currentRelation.mode;
+				attribute.rawXmlNodeListWrapper = currentRelation.rawXmlNodeListWrapper;
+				attribute.rawXmlAttributeWrapper = currentRelation.rawXmlAttributeWrapper;
 				attribute.name = readNext(arguments, i);
 				attribute.type = parseTypeId(readNext(arguments, i));
 				attribute.xpath = readNext(arguments, i);
 				currentRelation.attributes.push_back(attribute);
+			} else if (option == OPTION_MODE) {
+				string_t modeName = readNext(arguments, i);
+				Mode mode;
+				if (modeName == L"string") mode = Mode::STRING;
+				else if (modeName == L"boolean") mode = Mode::BOOLEAN;
+				else if (modeName == L"raw-xml") mode = Mode::RAW_XML;
+				else if (modeName == L"line-number") mode = Mode::LINE_NUMBER;
+				else if (modeName == L"xpath") mode = Mode::XPATH;
+				else throw relpipe::cli::RelpipeCLIException(L"Unsupported mode: " + modeName, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
+				if (currentRelation.attributes.size()) currentRelation.attributes.back().mode = mode;
+				else currentRelation.mode = mode;
+			} else if (option == OPTION_RAW_XML_NODELIST_WRAPPER) {
+				XmlElementSkeleton w = {readNext(arguments, i), readNext(arguments, i), readNext(arguments, i)};
+				if (currentRelation.attributes.size()) currentRelation.attributes.back().rawXmlNodeListWrapper = w;
+				else currentRelation.rawXmlNodeListWrapper = w;
+			} else if (option == OPTION_RAW_XML_ATTRIBUTE_WRAPPER) {
+				XmlElementSkeleton w = {readNext(arguments, i), readNext(arguments, i), readNext(arguments, i)};
+				if (currentRelation.attributes.size()) currentRelation.attributes.back().rawXmlAttributeWrapper = w;
+				else currentRelation.rawXmlAttributeWrapper = w;
 			} else throw relpipe::cli::RelpipeCLIException(L"Unsupported CLI option: " + option, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
 		}
 		addRelation(c, currentRelation); // last relation
@@ -111,6 +136,9 @@
 const string_t CLIParser::OPTION_RECORDS = L"--records";
 const string_t CLIParser::OPTION_ATTRIBUTE = L"--attribute";
 const string_t CLIParser::OPTION_XINCLUDE = L"--xinclude";
+const string_t CLIParser::OPTION_MODE = L"--mode";
+const string_t CLIParser::OPTION_RAW_XML_NODELIST_WRAPPER = L"--raw-xml-nodelist-wrapper";
+const string_t CLIParser::OPTION_RAW_XML_ATTRIBUTE_WRAPPER = L"--raw-xml-attribute-wrapper";
 
 }
 }
--- a/src/Configuration.h	Thu Jan 02 23:31:44 2020 +0100
+++ b/src/Configuration.h	Sun Jan 05 01:01:12 2020 +0100
@@ -25,6 +25,31 @@
 namespace in {
 namespace xmltable {
 
+enum class Mode {
+	STRING,
+	BOOLEAN,
+	// TODO: support also XML number, when we have a rational or decimal numbers in Relational pipes
+	RAW_XML,
+	LINE_NUMBER,
+	XPATH
+};
+
+class XmlElementSkeleton {
+public:
+	relpipe::writer::string_t name;
+	relpipe::writer::string_t uri;
+	relpipe::writer::string_t prefix;
+
+	XmlElementSkeleton() {
+	}
+
+	XmlElementSkeleton(relpipe::writer::string_t name, relpipe::writer::string_t uri = L"", relpipe::writer::string_t prefix = L"") : name(name), uri(uri), prefix(prefix) {
+	}
+
+	virtual ~XmlElementSkeleton() {
+	}
+};
+
 class AttributeRecipe {
 public:
 
@@ -34,6 +59,10 @@
 	relpipe::writer::string_t name;
 	relpipe::writer::TypeId type;
 	relpipe::writer::string_t xpath;
+	Mode mode = Mode::STRING;
+	XmlElementSkeleton rawXmlNodeListWrapper;
+	XmlElementSkeleton rawXmlAttributeWrapper;
+
 };
 
 class RelationConfiguration {
@@ -46,7 +75,11 @@
 	relpipe::writer::boolean_t nameIsXPath = false;
 	relpipe::writer::string_t xpath;
 	std::vector<AttributeRecipe> attributes;
-	
+
+	// Defaults/templates for AttributeRecipe:	
+	Mode mode = Mode::STRING;
+	XmlElementSkeleton rawXmlNodeListWrapper;
+	XmlElementSkeleton rawXmlAttributeWrapper = {L"attribute"};
 };
 
 class Configuration {
--- a/src/XMLTableCommand.h	Thu Jan 02 23:31:44 2020 +0100
+++ b/src/XMLTableCommand.h	Sun Jan 05 01:01:12 2020 +0100
@@ -22,6 +22,8 @@
 #include <sstream>
 #include <vector>
 #include <algorithm>
+#include <exception>
+#include <regex>
 
 #include <libxml++-2.6/libxml++/libxml++.h>
 
@@ -39,6 +41,45 @@
 private:
 	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
 
+	string_t formatRawXML(string_t rawXML) {
+		std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
+		return std::regex_replace(rawXML, pattern, L"");
+	}
+
+	void importNode(xmlpp::Node* parent, xmlpp::Node* child, AttributeRecipe attributeRecipe) {
+		if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns(
+				convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name),
+				convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri),
+				convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child);
+		else parent->import_node(child, true);
+	}
+
+	void importNode(xmlpp::Document* document, xmlpp::Node* child, AttributeRecipe attributeRecipe) {
+		if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node(
+				convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name),
+				convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri),
+				convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child);
+		else document->create_root_node_by_import(child, true);
+	}
+
+	string_t toRawXML(xmlpp::Node* parent, AttributeRecipe attributeRecipe, xmlpp::Node::PrefixNsMap ns) {
+		xmlpp::Document d;
+		xmlpp::NodeSet nodes = parent->find(convertor.to_bytes(attributeRecipe.xpath), ns);
+
+		if (attributeRecipe.rawXmlNodeListWrapper.name.size()) {
+			d.create_root_node(
+					convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.name),
+					convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.uri),
+					convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.prefix));
+			for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node, attributeRecipe);
+		} else {
+			if (nodes.size() == 1) importNode(&d, nodes[0], attributeRecipe);
+			else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception
+			else return L""; // TODO: null
+		}
+		return formatRawXML(convertor.from_bytes(d.write_to_string()));
+	}
+
 public:
 
 	void process(std::istream& input, std::ostream& output, Configuration& configuration) {
@@ -64,7 +105,24 @@
 			for (xmlpp::Node* n : root->find(convertor.to_bytes(r.xpath), ns)) {
 				for (AttributeRecipe a : r.attributes) {
 					// TODO: convert to bytes only once
-					writer->writeAttribute(convertor.from_bytes(n->eval_to_string(convertor.to_bytes(a.xpath), ns)));
+					std::string attributeXpath = convertor.to_bytes(a.xpath);
+					if (a.mode == Mode::STRING) {
+						writer->writeAttribute(convertor.from_bytes(n->eval_to_string(attributeXpath, ns)));
+					} else if (a.mode == Mode::BOOLEAN) {
+						writer->writeAttribute(n->eval_to_boolean(attributeXpath, ns) ? L"true" : L"false");
+					} else if (a.mode == Mode::LINE_NUMBER) {
+						xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns);
+						string_t line = attributeNodes.size() ? std::to_wstring(attributeNodes[0]->get_line()) : L""; // TODO: null
+						writer->writeAttribute(line);
+					} else if (a.mode == Mode::XPATH) {
+						xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns);
+						string_t line = attributeNodes.size() ? convertor.from_bytes(attributeNodes[0]->get_path()) : L""; // TODO: null
+						writer->writeAttribute(line);
+					} else if (a.mode == Mode::RAW_XML) {
+						writer->writeAttribute(toRawXML(n, a, ns));
+					} else {
+						throw logic_error("Unsupported mode."); // should never happer, TODO: better relpipe exception
+					}
 				}
 			}
 		}