--- a/src/XPathHandler.h Wed Dec 30 10:33:24 2020 +0100
+++ b/src/XPathHandler.h Wed Dec 30 17:09:15 2020 +0100
@@ -21,6 +21,7 @@
#include <vector>
#include <codecvt>
#include <regex>
+#include <stdexcept>
#include <libxml++-2.6/libxml++/libxml++.h>
@@ -83,19 +84,54 @@
return convertor.from_bytes(value);
}
+ bool isXmlAttribute(const relpipe::common::type::StringX& attributeName) {
+ for (auto pattern : currentRelationConfiguration->xmlAttributes) if (std::regex_match(attributeName, std::wregex(pattern))) return true;
+ return false;
+ }
+
+ const relpipe::common::type::StringX formatRawXML(const relpipe::common::type::StringX& rawXML) {
+ // TODO: move to a common library (used also in relpipe-in-xmltable)
+ std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
+ return std::regex_replace(rawXML, pattern, L"");
+ }
+
+ const relpipe::common::type::StringX serialize(xmlpp::Element* element, bool asXml) {
+ if (element) {
+ if (asXml) {
+ xmlpp::Document d;
+ d.create_root_node_by_import(element, true);
+ return formatRawXML(x2s(d.write_to_string()));
+ } else {
+ return element->get_child_text() ? x2s(element->get_child_text()->get_content()) : L"";
+ }
+ } else {
+ return L"";
+ }
+ }
+
+ xmlpp::Element* findSingleElement(const xmlpp::NodeSet& nodeset) {
+ if (nodeset.empty()) return nullptr;
+ else if (nodeset.size() > 1) throw std::invalid_argument("XPath should find one or zero elements.");
+ else if (xmlpp::Element * element = dynamic_cast<xmlpp::Element*> (nodeset[0])) return element;
+ else if (nodeset[0]->get_path() == "/") return findSingleElement(nodeset[0]->find("*")); // support also "/" not only "/*" expressions (return root element in both cases)
+ else throw std::invalid_argument("XPath should find an element, not other kinds of nodes.");
+ }
+
void writeInputAttributes() {
for (xmlpp::Node* attributeNode : recordElement->get_children()) {
if (xmlpp::Element * attributeElement = dynamic_cast<xmlpp::Element*> (attributeNode)) {
- auto value = attributeElement->get_child_text()->get_content();
- relationalWriter->writeAttribute(x2s(value));
+ bool asXml = isXmlAttribute(x2s(attributeElement->get_attribute("name")->get_value()));
+ relationalWriter->writeAttribute(serialize(asXml ? dynamic_cast<xmlpp::Element*> (attributeElement->get_first_child()) : attributeElement, asXml));
}
}
}
void writeOutputAttributes() {
for (auto oa : currentRelationConfiguration->outputAttributes) {
- auto value = recordElement->eval_to_string(s2x(oa.xpath), xmlns);
- relationalWriter->writeAttribute(x2s(value));
+ relpipe::common::type::StringX value;
+ if (isXmlAttribute(oa.name)) value = serialize(findSingleElement(recordElement->find(s2x(oa.xpath), xmlns)), true);
+ else value = x2s(recordElement->eval_to_string(s2x(oa.xpath), xmlns));
+ relationalWriter->writeAttribute(value);
}
}
@@ -146,10 +182,20 @@
if (currentRelationConfiguration) {
relpipe::reader::handlers::AttributeMetadata attributeMetadata = currentReaderMetadata[currentAttributeIndex];
+ // TODO: Parallel processing of records like in relpipe-in-filesystem? Or share common code with the XPath streamlet? (streamlets are parallelized)
+
xmlpp::Element* attributeElement = recordElement->add_child(xmlNameCodec.encode(s2x(attributeMetadata.getAttributeName())));
attributeElement->set_attribute("name", s2x(attributeMetadata.getAttributeName()));
attributeElement->set_attribute("type", s2x(attributeMetadata.getTypeName()));
- attributeElement->add_child_text(s2x(value));
+ if (isXmlAttribute(attributeMetadata.getAttributeName())) {
+ if (value.size()) {
+ xmlpp::DomParser attributeParser;
+ attributeParser.parse_memory(s2x(value));
+ attributeElement->import_node(attributeParser.get_document()->get_root_node(), true);
+ }
+ } else {
+ attributeElement->add_child_text(s2x(value));
+ }
if (currentAttributeIndex == 0) {
recordElement->set_attribute("number", std::to_string(currentRecordNumber));