--- a/streamlet-examples/xpath.cpp Thu Jan 30 14:40:52 2020 +0100
+++ b/streamlet-examples/xpath.cpp Thu Jan 30 18:04:10 2020 +0100
@@ -40,6 +40,14 @@
* - line-number
* - xpath
*
+ * The raw-xml mode provides a portion of the original XML defined by the XPath and can be further parametrized by options:
+ * - raw-xml-nodelist-wrapper-name
+ * - raw-xml-nodelist-wrapper-uri
+ * - raw-xml-nodelist-wrapper-prefix
+ * - raw-xml-attribute-wrapper-name
+ * - raw-xml-attribute-wrapper-uri
+ * - raw-xml-attribute-wrapper-prefix
+ *
* TODO: more OOP, move to separate repository, proper CMake project, clean-up, stabilize API
*/
class XPathStreamlet : public Streamlet {
@@ -57,6 +65,23 @@
for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[toBytes(o.valueMatch[1])] = toBytes(o.valueMatch[2]);
}
+ std::wstring rawXmlNodeListWrapperName;
+ std::wstring rawXmlNodeListWrapperUri;
+ std::wstring rawXmlNodeListWrapperPrefix;
+
+ std::wstring rawXmlAttributeWrapperName = L"attribute";
+ std::wstring rawXmlAttributeWrapperUri;
+ std::wstring rawXmlAttributeWrapperPrefix;
+
+ void findRawXmlOptions() {
+ for (Option o : getOptions(L"raw-xml-nodelist-wrapper-name")) rawXmlNodeListWrapperName = o.value;
+ for (Option o : getOptions(L"raw-xml-nodelist-wrapper-uri")) rawXmlNodeListWrapperUri = o.value;
+ for (Option o : getOptions(L"raw-xml-nodelist-wrapper-prefix")) rawXmlNodeListWrapperPrefix = o.value;
+ for (Option o : getOptions(L"raw-xml-attribute-wrapper-name")) rawXmlAttributeWrapperName = o.value;
+ for (Option o : getOptions(L"raw-xml-attribute-wrapper-uri")) rawXmlAttributeWrapperUri = o.value;
+ for (Option o : getOptions(L"raw-xml-attribute-wrapper-prefix")) rawXmlAttributeWrapperPrefix = o.value;
+ }
+
// Modes should share the logic of relpipe-in-xmltable
enum class Mode {
@@ -83,6 +108,45 @@
else return STRING;
}
+ std::wstring formatRawXML(std::wstring rawXML) {
+ std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
+ return std::regex_replace(rawXML, pattern, L"");
+ }
+
+ void importNode(xmlpp::Node* parent, xmlpp::Node* child) {
+ if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns(
+ toBytes(rawXmlAttributeWrapperName),
+ toBytes(rawXmlAttributeWrapperUri),
+ toBytes(rawXmlAttributeWrapperPrefix))->import_node(child);
+ else parent->import_node(child, true);
+ }
+
+ void importNode(xmlpp::Document* document, xmlpp::Node* child) {
+ if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node(
+ toBytes(rawXmlAttributeWrapperName),
+ toBytes(rawXmlAttributeWrapperUri),
+ toBytes(rawXmlAttributeWrapperPrefix))->import_node(child);
+ else document->create_root_node_by_import(child, true);
+ }
+
+ std::wstring toRawXML(xmlpp::Node* parent, std::string xpath, xmlpp::Node::PrefixNsMap ns) {
+ xmlpp::Document d;
+ xmlpp::NodeSet nodes = parent->find(xpath, ns);
+
+ if (rawXmlNodeListWrapperName.size()) {
+ d.create_root_node(
+ toBytes(rawXmlNodeListWrapperName),
+ toBytes(rawXmlNodeListWrapperUri),
+ toBytes(rawXmlNodeListWrapperPrefix));
+ for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node);
+ } else {
+ if (nodes.size() == 1) importNode(&d, nodes[0]);
+ else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception
+ else return L"";
+ }
+ return formatRawXML(fromBytes(d.write_to_string()));
+ }
+
class XPathAttribute {
public:
@@ -98,6 +162,7 @@
std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
findXmlnsInEnvironment();
findXmlnsInOptions();
+ findRawXmlOptions();
std::vector<AttributeMetadata> oam;
@@ -139,7 +204,7 @@
if (attributeNodes.size()) result = fromBytes(attributeNodes[0]->get_path());
else isNull = true;
} else if (xpathAttribute.mode == Mode::RAW_XML) {
- throw std::logic_error("Raw XML mode is not yet implemented."); // TODO: implement also RAW_XML
+ result = toRawXML(root, xpath, ns);
} else {
throw std::logic_error("Unsupported mode."); // should never happer
}