streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable) v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Thu, 30 Jan 2020 18:04:10 +0100
branchv_0
changeset 77 a680bcd946cd
parent 76 679f1e793ee3
child 78 5a63bf594f53
streamlet examples: xpath: RAW XML mode (adapted from relpipe-in-xmltable)
streamlet-examples/xpath.cpp
--- a/streamlet-examples/xpath.cpp	Thu Jan 30 14:40:52 2020 +0100
+++ b/streamlet-examples/xpath.cpp	Thu Jan 30 18:04:10 2020 +0100
@@ -40,6 +40,14 @@
  *  - line-number
  *  - xpath
  * 
+ * The raw-xml mode provides a portion of the original XML defined by the XPath and can be further parametrized by options:
+ *  - raw-xml-nodelist-wrapper-name
+ *  - raw-xml-nodelist-wrapper-uri
+ *  - raw-xml-nodelist-wrapper-prefix
+ *  - raw-xml-attribute-wrapper-name
+ *  - raw-xml-attribute-wrapper-uri
+ *  - raw-xml-attribute-wrapper-prefix
+ * 
  * TODO: more OOP, move to separate repository, proper CMake project, clean-up, stabilize API
  */
 class XPathStreamlet : public Streamlet {
@@ -57,6 +65,23 @@
 		for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[toBytes(o.valueMatch[1])] = toBytes(o.valueMatch[2]);
 	}
 
+	std::wstring rawXmlNodeListWrapperName;
+	std::wstring rawXmlNodeListWrapperUri;
+	std::wstring rawXmlNodeListWrapperPrefix;
+
+	std::wstring rawXmlAttributeWrapperName = L"attribute";
+	std::wstring rawXmlAttributeWrapperUri;
+	std::wstring rawXmlAttributeWrapperPrefix;
+
+	void findRawXmlOptions() {
+		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-name")) rawXmlNodeListWrapperName = o.value;
+		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-uri")) rawXmlNodeListWrapperUri = o.value;
+		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-prefix")) rawXmlNodeListWrapperPrefix = o.value;
+		for (Option o : getOptions(L"raw-xml-attribute-wrapper-name")) rawXmlAttributeWrapperName = o.value;
+		for (Option o : getOptions(L"raw-xml-attribute-wrapper-uri")) rawXmlAttributeWrapperUri = o.value;
+		for (Option o : getOptions(L"raw-xml-attribute-wrapper-prefix")) rawXmlAttributeWrapperPrefix = o.value;
+	}
+
 	// Modes should share the logic of relpipe-in-xmltable
 
 	enum class Mode {
@@ -83,6 +108,45 @@
 		else return STRING;
 	}
 
+	std::wstring formatRawXML(std::wstring rawXML) {
+		std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
+		return std::regex_replace(rawXML, pattern, L"");
+	}
+
+	void importNode(xmlpp::Node* parent, xmlpp::Node* child) {
+		if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns(
+				toBytes(rawXmlAttributeWrapperName),
+				toBytes(rawXmlAttributeWrapperUri),
+				toBytes(rawXmlAttributeWrapperPrefix))->import_node(child);
+		else parent->import_node(child, true);
+	}
+
+	void importNode(xmlpp::Document* document, xmlpp::Node* child) {
+		if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node(
+				toBytes(rawXmlAttributeWrapperName),
+				toBytes(rawXmlAttributeWrapperUri),
+				toBytes(rawXmlAttributeWrapperPrefix))->import_node(child);
+		else document->create_root_node_by_import(child, true);
+	}
+
+	std::wstring toRawXML(xmlpp::Node* parent, std::string xpath, xmlpp::Node::PrefixNsMap ns) {
+		xmlpp::Document d;
+		xmlpp::NodeSet nodes = parent->find(xpath, ns);
+
+		if (rawXmlNodeListWrapperName.size()) {
+			d.create_root_node(
+					toBytes(rawXmlNodeListWrapperName),
+					toBytes(rawXmlNodeListWrapperUri),
+					toBytes(rawXmlNodeListWrapperPrefix));
+			for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node);
+		} else {
+			if (nodes.size() == 1) importNode(&d, nodes[0]);
+			else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception
+			else return L"";
+		}
+		return formatRawXML(fromBytes(d.write_to_string()));
+	}
+
 	class XPathAttribute {
 	public:
 
@@ -98,6 +162,7 @@
 	std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
 		findXmlnsInEnvironment();
 		findXmlnsInOptions();
+		findRawXmlOptions();
 
 		std::vector<AttributeMetadata> oam;
 
@@ -139,7 +204,7 @@
 					if (attributeNodes.size()) result = fromBytes(attributeNodes[0]->get_path());
 					else isNull = true;
 				} else if (xpathAttribute.mode == Mode::RAW_XML) {
-					throw std::logic_error("Raw XML mode is not yet implemented."); // TODO: implement also RAW_XML
+					result = toRawXML(root, xpath, ns);
 				} else {
 					throw std::logic_error("Unsupported mode."); // should never happer
 				}