src/XPathHandler.h
branchv_0
changeset 7 7f2d09c3b1de
parent 6 e498b3466342
child 8 9f95cfd68f25
equal deleted inserted replaced
6:e498b3466342 7:7f2d09c3b1de
    19 #include <memory>
    19 #include <memory>
    20 #include <string>
    20 #include <string>
    21 #include <vector>
    21 #include <vector>
    22 #include <codecvt>
    22 #include <codecvt>
    23 #include <regex>
    23 #include <regex>
       
    24 #include <stdexcept>
    24 
    25 
    25 #include <libxml++-2.6/libxml++/libxml++.h>
    26 #include <libxml++-2.6/libxml++/libxml++.h>
    26 
    27 
    27 #include <relpipe/common/type/typedefs.h>
    28 #include <relpipe/common/type/typedefs.h>
    28 #include <relpipe/reader/TypeId.h>
    29 #include <relpipe/reader/TypeId.h>
    81 
    82 
    82 	const relpipe::common::type::StringX x2s(const Glib::ustring& value) {
    83 	const relpipe::common::type::StringX x2s(const Glib::ustring& value) {
    83 		return convertor.from_bytes(value);
    84 		return convertor.from_bytes(value);
    84 	}
    85 	}
    85 
    86 
       
    87 	bool isXmlAttribute(const relpipe::common::type::StringX& attributeName) {
       
    88 		for (auto pattern : currentRelationConfiguration->xmlAttributes) if (std::regex_match(attributeName, std::wregex(pattern))) return true;
       
    89 		return false;
       
    90 	}
       
    91 
       
    92 	const relpipe::common::type::StringX formatRawXML(const relpipe::common::type::StringX& rawXML) {
       
    93 		// TODO: move to a common library (used also in relpipe-in-xmltable)
       
    94 		std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
       
    95 		return std::regex_replace(rawXML, pattern, L"");
       
    96 	}
       
    97 
       
    98 	const relpipe::common::type::StringX serialize(xmlpp::Element* element, bool asXml) {
       
    99 		if (element) {
       
   100 			if (asXml) {
       
   101 				xmlpp::Document d;
       
   102 				d.create_root_node_by_import(element, true);
       
   103 				return formatRawXML(x2s(d.write_to_string()));
       
   104 			} else {
       
   105 				return element->get_child_text() ? x2s(element->get_child_text()->get_content()) : L"";
       
   106 			}
       
   107 		} else {
       
   108 			return L"";
       
   109 		}
       
   110 	}
       
   111 
       
   112 	xmlpp::Element* findSingleElement(const xmlpp::NodeSet& nodeset) {
       
   113 		if (nodeset.empty()) return nullptr;
       
   114 		else if (nodeset.size() > 1) throw std::invalid_argument("XPath should find one or zero elements.");
       
   115 		else if (xmlpp::Element * element = dynamic_cast<xmlpp::Element*> (nodeset[0])) return element;
       
   116 		else if (nodeset[0]->get_path() == "/") return findSingleElement(nodeset[0]->find("*")); // support also "/" not only "/*" expressions (return root element in both cases)
       
   117 		else throw std::invalid_argument("XPath should find an element, not other kinds of nodes.");
       
   118 	}
       
   119 
    86 	void writeInputAttributes() {
   120 	void writeInputAttributes() {
    87 		for (xmlpp::Node* attributeNode : recordElement->get_children()) {
   121 		for (xmlpp::Node* attributeNode : recordElement->get_children()) {
    88 			if (xmlpp::Element * attributeElement = dynamic_cast<xmlpp::Element*> (attributeNode)) {
   122 			if (xmlpp::Element * attributeElement = dynamic_cast<xmlpp::Element*> (attributeNode)) {
    89 				auto value = attributeElement->get_child_text()->get_content();
   123 				bool asXml = isXmlAttribute(x2s(attributeElement->get_attribute("name")->get_value()));
    90 				relationalWriter->writeAttribute(x2s(value));
   124 				relationalWriter->writeAttribute(serialize(asXml ? dynamic_cast<xmlpp::Element*> (attributeElement->get_first_child()) : attributeElement, asXml));
    91 			}
   125 			}
    92 		}
   126 		}
    93 	}
   127 	}
    94 
   128 
    95 	void writeOutputAttributes() {
   129 	void writeOutputAttributes() {
    96 		for (auto oa : currentRelationConfiguration->outputAttributes) {
   130 		for (auto oa : currentRelationConfiguration->outputAttributes) {
    97 			auto value = recordElement->eval_to_string(s2x(oa.xpath), xmlns);
   131 			relpipe::common::type::StringX value;
    98 			relationalWriter->writeAttribute(x2s(value));
   132 			if (isXmlAttribute(oa.name)) value = serialize(findSingleElement(recordElement->find(s2x(oa.xpath), xmlns)), true);
       
   133 			else value = x2s(recordElement->eval_to_string(s2x(oa.xpath), xmlns));
       
   134 			relationalWriter->writeAttribute(value);
    99 		}
   135 		}
   100 	}
   136 	}
   101 
   137 
   102 public:
   138 public:
   103 
   139 
   144 
   180 
   145 	void attribute(const relpipe::common::type::StringX& value) override {
   181 	void attribute(const relpipe::common::type::StringX& value) override {
   146 		if (currentRelationConfiguration) {
   182 		if (currentRelationConfiguration) {
   147 			relpipe::reader::handlers::AttributeMetadata attributeMetadata = currentReaderMetadata[currentAttributeIndex];
   183 			relpipe::reader::handlers::AttributeMetadata attributeMetadata = currentReaderMetadata[currentAttributeIndex];
   148 
   184 
       
   185 			// TODO: Parallel processing of records like in relpipe-in-filesystem? Or share common code with the XPath streamlet? (streamlets are parallelized)
       
   186 
   149 			xmlpp::Element* attributeElement = recordElement->add_child(xmlNameCodec.encode(s2x(attributeMetadata.getAttributeName())));
   187 			xmlpp::Element* attributeElement = recordElement->add_child(xmlNameCodec.encode(s2x(attributeMetadata.getAttributeName())));
   150 			attributeElement->set_attribute("name", s2x(attributeMetadata.getAttributeName()));
   188 			attributeElement->set_attribute("name", s2x(attributeMetadata.getAttributeName()));
   151 			attributeElement->set_attribute("type", s2x(attributeMetadata.getTypeName()));
   189 			attributeElement->set_attribute("type", s2x(attributeMetadata.getTypeName()));
   152 			attributeElement->add_child_text(s2x(value));
   190 			if (isXmlAttribute(attributeMetadata.getAttributeName())) {
       
   191 				if (value.size()) {
       
   192 					xmlpp::DomParser attributeParser;
       
   193 					attributeParser.parse_memory(s2x(value));
       
   194 					attributeElement->import_node(attributeParser.get_document()->get_root_node(), true);
       
   195 				}
       
   196 			} else {
       
   197 				attributeElement->add_child_text(s2x(value));
       
   198 			}
   153 
   199 
   154 			if (currentAttributeIndex == 0) {
   200 			if (currentAttributeIndex == 0) {
   155 				recordElement->set_attribute("number", std::to_string(currentRecordNumber));
   201 				recordElement->set_attribute("number", std::to_string(currentRecordNumber));
   156 			}
   202 			}
   157 
   203