# HG changeset patch # User František Kučera # Date 1609344555 -3600 # Node ID 7f2d09c3b1de3e3c42a89c0bad5e93d4f268e197 # Parent e498b34663426b432bc5af9f7435d12d4ceec92c add --xml-attribute support (both input and output attributes) diff -r e498b3466342 -r 7f2d09c3b1de src/XPathHandler.h --- a/src/XPathHandler.h Wed Dec 30 10:33:24 2020 +0100 +++ b/src/XPathHandler.h Wed Dec 30 17:09:15 2020 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -83,19 +84,54 @@ return convertor.from_bytes(value); } + bool isXmlAttribute(const relpipe::common::type::StringX& attributeName) { + for (auto pattern : currentRelationConfiguration->xmlAttributes) if (std::regex_match(attributeName, std::wregex(pattern))) return true; + return false; + } + + const relpipe::common::type::StringX formatRawXML(const relpipe::common::type::StringX& rawXML) { + // TODO: move to a common library (used also in relpipe-in-xmltable) + std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$"); + return std::regex_replace(rawXML, pattern, L""); + } + + const relpipe::common::type::StringX serialize(xmlpp::Element* element, bool asXml) { + if (element) { + if (asXml) { + xmlpp::Document d; + d.create_root_node_by_import(element, true); + return formatRawXML(x2s(d.write_to_string())); + } else { + return element->get_child_text() ? x2s(element->get_child_text()->get_content()) : L""; + } + } else { + return L""; + } + } + + xmlpp::Element* findSingleElement(const xmlpp::NodeSet& nodeset) { + if (nodeset.empty()) return nullptr; + else if (nodeset.size() > 1) throw std::invalid_argument("XPath should find one or zero elements."); + else if (xmlpp::Element * element = dynamic_cast (nodeset[0])) return element; + else if (nodeset[0]->get_path() == "/") return findSingleElement(nodeset[0]->find("*")); // support also "/" not only "/*" expressions (return root element in both cases) + else throw std::invalid_argument("XPath should find an element, not other kinds of nodes."); + } + void writeInputAttributes() { for (xmlpp::Node* attributeNode : recordElement->get_children()) { if (xmlpp::Element * attributeElement = dynamic_cast (attributeNode)) { - auto value = attributeElement->get_child_text()->get_content(); - relationalWriter->writeAttribute(x2s(value)); + bool asXml = isXmlAttribute(x2s(attributeElement->get_attribute("name")->get_value())); + relationalWriter->writeAttribute(serialize(asXml ? dynamic_cast (attributeElement->get_first_child()) : attributeElement, asXml)); } } } void writeOutputAttributes() { for (auto oa : currentRelationConfiguration->outputAttributes) { - auto value = recordElement->eval_to_string(s2x(oa.xpath), xmlns); - relationalWriter->writeAttribute(x2s(value)); + relpipe::common::type::StringX value; + if (isXmlAttribute(oa.name)) value = serialize(findSingleElement(recordElement->find(s2x(oa.xpath), xmlns)), true); + else value = x2s(recordElement->eval_to_string(s2x(oa.xpath), xmlns)); + relationalWriter->writeAttribute(value); } } @@ -146,10 +182,20 @@ if (currentRelationConfiguration) { relpipe::reader::handlers::AttributeMetadata attributeMetadata = currentReaderMetadata[currentAttributeIndex]; + // TODO: Parallel processing of records like in relpipe-in-filesystem? Or share common code with the XPath streamlet? (streamlets are parallelized) + xmlpp::Element* attributeElement = recordElement->add_child(xmlNameCodec.encode(s2x(attributeMetadata.getAttributeName()))); attributeElement->set_attribute("name", s2x(attributeMetadata.getAttributeName())); attributeElement->set_attribute("type", s2x(attributeMetadata.getTypeName())); - attributeElement->add_child_text(s2x(value)); + if (isXmlAttribute(attributeMetadata.getAttributeName())) { + if (value.size()) { + xmlpp::DomParser attributeParser; + attributeParser.parse_memory(s2x(value)); + attributeElement->import_node(attributeParser.get_document()->get_root_node(), true); + } + } else { + attributeElement->add_child_text(s2x(value)); + } if (currentAttributeIndex == 0) { recordElement->set_attribute("number", std::to_string(currentRecordNumber));