streamlet-examples/xpath.cpp
branchv_0
changeset 77 a680bcd946cd
parent 75 ecbf6504915c
child 78 5a63bf594f53
equal deleted inserted replaced
76:679f1e793ee3 77:a680bcd946cd
    38  *  - boolean
    38  *  - boolean
    39  *  - raw-xml
    39  *  - raw-xml
    40  *  - line-number
    40  *  - line-number
    41  *  - xpath
    41  *  - xpath
    42  * 
    42  * 
       
    43  * The raw-xml mode provides a portion of the original XML defined by the XPath and can be further parametrized by options:
       
    44  *  - raw-xml-nodelist-wrapper-name
       
    45  *  - raw-xml-nodelist-wrapper-uri
       
    46  *  - raw-xml-nodelist-wrapper-prefix
       
    47  *  - raw-xml-attribute-wrapper-name
       
    48  *  - raw-xml-attribute-wrapper-uri
       
    49  *  - raw-xml-attribute-wrapper-prefix
       
    50  * 
    43  * TODO: more OOP, move to separate repository, proper CMake project, clean-up, stabilize API
    51  * TODO: more OOP, move to separate repository, proper CMake project, clean-up, stabilize API
    44  */
    52  */
    45 class XPathStreamlet : public Streamlet {
    53 class XPathStreamlet : public Streamlet {
    46 private:
    54 private:
    47 	xmlpp::Node::PrefixNsMap ns;
    55 	xmlpp::Node::PrefixNsMap ns;
    53 	}
    61 	}
    54 
    62 
    55 	void findXmlnsInOptions() {
    63 	void findXmlnsInOptions() {
    56 		for (Option o : getOptions(std::wregex(L"xmlns[:_](.*)"))) ns[toBytes(o.nameMatch[1])] = toBytes(o.value);
    64 		for (Option o : getOptions(std::wregex(L"xmlns[:_](.*)"))) ns[toBytes(o.nameMatch[1])] = toBytes(o.value);
    57 		for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[toBytes(o.valueMatch[1])] = toBytes(o.valueMatch[2]);
    65 		for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[toBytes(o.valueMatch[1])] = toBytes(o.valueMatch[2]);
       
    66 	}
       
    67 
       
    68 	std::wstring rawXmlNodeListWrapperName;
       
    69 	std::wstring rawXmlNodeListWrapperUri;
       
    70 	std::wstring rawXmlNodeListWrapperPrefix;
       
    71 
       
    72 	std::wstring rawXmlAttributeWrapperName = L"attribute";
       
    73 	std::wstring rawXmlAttributeWrapperUri;
       
    74 	std::wstring rawXmlAttributeWrapperPrefix;
       
    75 
       
    76 	void findRawXmlOptions() {
       
    77 		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-name")) rawXmlNodeListWrapperName = o.value;
       
    78 		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-uri")) rawXmlNodeListWrapperUri = o.value;
       
    79 		for (Option o : getOptions(L"raw-xml-nodelist-wrapper-prefix")) rawXmlNodeListWrapperPrefix = o.value;
       
    80 		for (Option o : getOptions(L"raw-xml-attribute-wrapper-name")) rawXmlAttributeWrapperName = o.value;
       
    81 		for (Option o : getOptions(L"raw-xml-attribute-wrapper-uri")) rawXmlAttributeWrapperUri = o.value;
       
    82 		for (Option o : getOptions(L"raw-xml-attribute-wrapper-prefix")) rawXmlAttributeWrapperPrefix = o.value;
    58 	}
    83 	}
    59 
    84 
    60 	// Modes should share the logic of relpipe-in-xmltable
    85 	// Modes should share the logic of relpipe-in-xmltable
    61 
    86 
    62 	enum class Mode {
    87 	enum class Mode {
    81 		if (mode == Mode::BOOLEAN) return BOOLEAN;
   106 		if (mode == Mode::BOOLEAN) return BOOLEAN;
    82 		else if (mode == Mode::LINE_NUMBER) return INTEGER;
   107 		else if (mode == Mode::LINE_NUMBER) return INTEGER;
    83 		else return STRING;
   108 		else return STRING;
    84 	}
   109 	}
    85 
   110 
       
   111 	std::wstring formatRawXML(std::wstring rawXML) {
       
   112 		std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
       
   113 		return std::regex_replace(rawXML, pattern, L"");
       
   114 	}
       
   115 
       
   116 	void importNode(xmlpp::Node* parent, xmlpp::Node* child) {
       
   117 		if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns(
       
   118 				toBytes(rawXmlAttributeWrapperName),
       
   119 				toBytes(rawXmlAttributeWrapperUri),
       
   120 				toBytes(rawXmlAttributeWrapperPrefix))->import_node(child);
       
   121 		else parent->import_node(child, true);
       
   122 	}
       
   123 
       
   124 	void importNode(xmlpp::Document* document, xmlpp::Node* child) {
       
   125 		if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node(
       
   126 				toBytes(rawXmlAttributeWrapperName),
       
   127 				toBytes(rawXmlAttributeWrapperUri),
       
   128 				toBytes(rawXmlAttributeWrapperPrefix))->import_node(child);
       
   129 		else document->create_root_node_by_import(child, true);
       
   130 	}
       
   131 
       
   132 	std::wstring toRawXML(xmlpp::Node* parent, std::string xpath, xmlpp::Node::PrefixNsMap ns) {
       
   133 		xmlpp::Document d;
       
   134 		xmlpp::NodeSet nodes = parent->find(xpath, ns);
       
   135 
       
   136 		if (rawXmlNodeListWrapperName.size()) {
       
   137 			d.create_root_node(
       
   138 					toBytes(rawXmlNodeListWrapperName),
       
   139 					toBytes(rawXmlNodeListWrapperUri),
       
   140 					toBytes(rawXmlNodeListWrapperPrefix));
       
   141 			for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node);
       
   142 		} else {
       
   143 			if (nodes.size() == 1) importNode(&d, nodes[0]);
       
   144 			else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception
       
   145 			else return L"";
       
   146 		}
       
   147 		return formatRawXML(fromBytes(d.write_to_string()));
       
   148 	}
       
   149 
    86 	class XPathAttribute {
   150 	class XPathAttribute {
    87 	public:
   151 	public:
    88 
   152 
    89 		std::wstring name;
   153 		std::wstring name;
    90 		std::wstring xpath;
   154 		std::wstring xpath;
    96 protected:
   160 protected:
    97 
   161 
    98 	std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
   162 	std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
    99 		findXmlnsInEnvironment();
   163 		findXmlnsInEnvironment();
   100 		findXmlnsInOptions();
   164 		findXmlnsInOptions();
       
   165 		findRawXmlOptions();
   101 
   166 
   102 		std::vector<AttributeMetadata> oam;
   167 		std::vector<AttributeMetadata> oam;
   103 
   168 
   104 		std::vector<Option> modeOptions = getOptions(L"mode");
   169 		std::vector<Option> modeOptions = getOptions(L"mode");
   105 		std::vector<Option> attributeOptions = getOptions(L"attribute");
   170 		std::vector<Option> attributeOptions = getOptions(L"attribute");
   137 				} else if (xpathAttribute.mode == Mode::XPATH) {
   202 				} else if (xpathAttribute.mode == Mode::XPATH) {
   138 					xmlpp::NodeSet attributeNodes = root->find(xpath, ns);
   203 					xmlpp::NodeSet attributeNodes = root->find(xpath, ns);
   139 					if (attributeNodes.size()) result = fromBytes(attributeNodes[0]->get_path());
   204 					if (attributeNodes.size()) result = fromBytes(attributeNodes[0]->get_path());
   140 					else isNull = true;
   205 					else isNull = true;
   141 				} else if (xpathAttribute.mode == Mode::RAW_XML) {
   206 				} else if (xpathAttribute.mode == Mode::RAW_XML) {
   142 					throw std::logic_error("Raw XML mode is not yet implemented."); // TODO: implement also RAW_XML
   207 					result = toRawXML(root, xpath, ns);
   143 				} else {
   208 				} else {
   144 					throw std::logic_error("Unsupported mode."); // should never happer
   209 					throw std::logic_error("Unsupported mode."); // should never happer
   145 				}
   210 				}
   146 
   211 
   147 				oa.push_back({result, isNull});
   212 				oa.push_back({result, isNull});