suport multiple modes of reading from XML: string, boolean, raw-xml, line-number, xpath
--- a/bash-completion.sh Thu Jan 02 23:31:44 2020 +0100
+++ b/bash-completion.sh Sun Jan 05 01:01:12 2020 +0100
@@ -33,6 +33,14 @@
"false"
)
+ MODE=(
+ "string"
+ "boolean"
+ "raw-xml"
+ "line-number"
+ "xpath"
+ )
+
# FIXME: user must type " and then press TAB otherwise the completion is broken due to the : colon
#
# can be fixed by global modification of environment variable:
@@ -51,15 +59,22 @@
)
- if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''")
- elif [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''")
- elif [[ "$w1" == "--records" && "x$w0" == "x" ]]; then COMPREPLY=("'/'")
- elif [[ "$w1" == "--attribute" && "x$w0" == "x" ]]; then COMPREPLY=("''")
- elif [[ "$w2" == "--attribute" ]]; then COMPREPLY=($(compgen -W "${DATA_TYPE[*]}" -- "$w0"))
- elif [[ "$w3" == "--attribute" && "x$w0" == "x" ]]; then COMPREPLY=("''")
- elif [[ "$w1" == "--namespace" && "x$w0" == "x" ]]; then COMPREPLY=("''")
- elif [[ "$w2" == "--namespace" ]]; then COMPREPLY=($(compgen -W "${XMLNS[*]}" -- "$w0"))
- elif [[ "$w1" == "--xinclude" ]]; then COMPREPLY=($(compgen -W "${XINCLUDE[*]}" -- "$w0"))
+ if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''")
+ elif [[ "$w1" == "--records" && "x$w0" == "x" ]]; then COMPREPLY=("'/'")
+ elif [[ "$w1" == "--attribute" && "x$w0" == "x" ]]; then COMPREPLY=("''")
+ elif [[ "$w2" == "--attribute" ]]; then COMPREPLY=($(compgen -W "${DATA_TYPE[*]}" -- "$w0"))
+ elif [[ "$w3" == "--attribute" && "x$w0" == "x" ]]; then COMPREPLY=("''")
+ elif [[ "$w1" == "--namespace" && "x$w0" == "x" ]]; then COMPREPLY=("''")
+ elif [[ "$w2" == "--namespace" ]]; then COMPREPLY=($(compgen -W "${XMLNS[*]}" -- "$w0"))
+ elif [[ "$w1" == "--xinclude" ]]; then COMPREPLY=($(compgen -W "${XINCLUDE[*]}" -- "$w0"))
+ elif [[ "$w1" == "--mode" ]]; then COMPREPLY=($(compgen -W "${MODE[*]}" -- "$w0"))
+ elif [[ "$w1" == "--raw-xml-nodelist-wrapper" ]]; then COMPREPLY=("'xml'")
+ elif [[ "$w2" == "--raw-xml-nodelist-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''")
+ elif [[ "$w3" == "--raw-xml-nodelist-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''")
+
+ elif [[ "$w1" == "--raw-xml-attribute-wrapper" ]]; then COMPREPLY=("'attribute'")
+ elif [[ "$w2" == "--raw-xml-attribute-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''")
+ elif [[ "$w3" == "--raw-xml-attribute-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''")
else
OPTIONS=(
"--namespace"
@@ -68,6 +83,9 @@
"--name-is-xpath"
"--attribute"
"--xinclude"
+ "--mode"
+ "--raw-xml-nodelist-wrapper"
+ "--raw-xml-attribute-wrapper"
)
COMPREPLY=($(compgen -W "${OPTIONS[*]}" -- "$w0"))
fi
--- a/src/CLIParser.h Thu Jan 02 23:31:44 2020 +0100
+++ b/src/CLIParser.h Sun Jan 05 01:01:12 2020 +0100
@@ -68,6 +68,9 @@
static const string_t OPTION_RECORDS;
static const string_t OPTION_ATTRIBUTE;
static const string_t OPTION_XINCLUDE;
+ static const string_t OPTION_MODE;
+ static const string_t OPTION_RAW_XML_NODELIST_WRAPPER;
+ static const string_t OPTION_RAW_XML_ATTRIBUTE_WRAPPER;
Configuration parse(const std::vector<string_t>& arguments) {
Configuration c;
@@ -90,10 +93,32 @@
currentRelation.xpath = readNext(arguments, i);
} else if (option == OPTION_ATTRIBUTE) {
AttributeRecipe attribute;
+ attribute.mode = currentRelation.mode;
+ attribute.rawXmlNodeListWrapper = currentRelation.rawXmlNodeListWrapper;
+ attribute.rawXmlAttributeWrapper = currentRelation.rawXmlAttributeWrapper;
attribute.name = readNext(arguments, i);
attribute.type = parseTypeId(readNext(arguments, i));
attribute.xpath = readNext(arguments, i);
currentRelation.attributes.push_back(attribute);
+ } else if (option == OPTION_MODE) {
+ string_t modeName = readNext(arguments, i);
+ Mode mode;
+ if (modeName == L"string") mode = Mode::STRING;
+ else if (modeName == L"boolean") mode = Mode::BOOLEAN;
+ else if (modeName == L"raw-xml") mode = Mode::RAW_XML;
+ else if (modeName == L"line-number") mode = Mode::LINE_NUMBER;
+ else if (modeName == L"xpath") mode = Mode::XPATH;
+ else throw relpipe::cli::RelpipeCLIException(L"Unsupported mode: " + modeName, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
+ if (currentRelation.attributes.size()) currentRelation.attributes.back().mode = mode;
+ else currentRelation.mode = mode;
+ } else if (option == OPTION_RAW_XML_NODELIST_WRAPPER) {
+ XmlElementSkeleton w = {readNext(arguments, i), readNext(arguments, i), readNext(arguments, i)};
+ if (currentRelation.attributes.size()) currentRelation.attributes.back().rawXmlNodeListWrapper = w;
+ else currentRelation.rawXmlNodeListWrapper = w;
+ } else if (option == OPTION_RAW_XML_ATTRIBUTE_WRAPPER) {
+ XmlElementSkeleton w = {readNext(arguments, i), readNext(arguments, i), readNext(arguments, i)};
+ if (currentRelation.attributes.size()) currentRelation.attributes.back().rawXmlAttributeWrapper = w;
+ else currentRelation.rawXmlAttributeWrapper = w;
} else throw relpipe::cli::RelpipeCLIException(L"Unsupported CLI option: " + option, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
}
addRelation(c, currentRelation); // last relation
@@ -111,6 +136,9 @@
const string_t CLIParser::OPTION_RECORDS = L"--records";
const string_t CLIParser::OPTION_ATTRIBUTE = L"--attribute";
const string_t CLIParser::OPTION_XINCLUDE = L"--xinclude";
+const string_t CLIParser::OPTION_MODE = L"--mode";
+const string_t CLIParser::OPTION_RAW_XML_NODELIST_WRAPPER = L"--raw-xml-nodelist-wrapper";
+const string_t CLIParser::OPTION_RAW_XML_ATTRIBUTE_WRAPPER = L"--raw-xml-attribute-wrapper";
}
}
--- a/src/Configuration.h Thu Jan 02 23:31:44 2020 +0100
+++ b/src/Configuration.h Sun Jan 05 01:01:12 2020 +0100
@@ -25,6 +25,31 @@
namespace in {
namespace xmltable {
+enum class Mode {
+ STRING,
+ BOOLEAN,
+ // TODO: support also XML number, when we have a rational or decimal numbers in Relational pipes
+ RAW_XML,
+ LINE_NUMBER,
+ XPATH
+};
+
+class XmlElementSkeleton {
+public:
+ relpipe::writer::string_t name;
+ relpipe::writer::string_t uri;
+ relpipe::writer::string_t prefix;
+
+ XmlElementSkeleton() {
+ }
+
+ XmlElementSkeleton(relpipe::writer::string_t name, relpipe::writer::string_t uri = L"", relpipe::writer::string_t prefix = L"") : name(name), uri(uri), prefix(prefix) {
+ }
+
+ virtual ~XmlElementSkeleton() {
+ }
+};
+
class AttributeRecipe {
public:
@@ -34,6 +59,10 @@
relpipe::writer::string_t name;
relpipe::writer::TypeId type;
relpipe::writer::string_t xpath;
+ Mode mode = Mode::STRING;
+ XmlElementSkeleton rawXmlNodeListWrapper;
+ XmlElementSkeleton rawXmlAttributeWrapper;
+
};
class RelationConfiguration {
@@ -46,7 +75,11 @@
relpipe::writer::boolean_t nameIsXPath = false;
relpipe::writer::string_t xpath;
std::vector<AttributeRecipe> attributes;
-
+
+ // Defaults/templates for AttributeRecipe:
+ Mode mode = Mode::STRING;
+ XmlElementSkeleton rawXmlNodeListWrapper;
+ XmlElementSkeleton rawXmlAttributeWrapper = {L"attribute"};
};
class Configuration {
--- a/src/XMLTableCommand.h Thu Jan 02 23:31:44 2020 +0100
+++ b/src/XMLTableCommand.h Sun Jan 05 01:01:12 2020 +0100
@@ -22,6 +22,8 @@
#include <sstream>
#include <vector>
#include <algorithm>
+#include <exception>
+#include <regex>
#include <libxml++-2.6/libxml++/libxml++.h>
@@ -39,6 +41,45 @@
private:
std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
+ string_t formatRawXML(string_t rawXML) {
+ std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
+ return std::regex_replace(rawXML, pattern, L"");
+ }
+
+ void importNode(xmlpp::Node* parent, xmlpp::Node* child, AttributeRecipe attributeRecipe) {
+ if (dynamic_cast<xmlpp::AttributeNode*> (child)) parent->add_child_with_new_ns(
+ convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name),
+ convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri),
+ convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child);
+ else parent->import_node(child, true);
+ }
+
+ void importNode(xmlpp::Document* document, xmlpp::Node* child, AttributeRecipe attributeRecipe) {
+ if (dynamic_cast<xmlpp::AttributeNode*> (child)) document->create_root_node(
+ convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name),
+ convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri),
+ convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child);
+ else document->create_root_node_by_import(child, true);
+ }
+
+ string_t toRawXML(xmlpp::Node* parent, AttributeRecipe attributeRecipe, xmlpp::Node::PrefixNsMap ns) {
+ xmlpp::Document d;
+ xmlpp::NodeSet nodes = parent->find(convertor.to_bytes(attributeRecipe.xpath), ns);
+
+ if (attributeRecipe.rawXmlNodeListWrapper.name.size()) {
+ d.create_root_node(
+ convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.name),
+ convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.uri),
+ convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.prefix));
+ for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node, attributeRecipe);
+ } else {
+ if (nodes.size() == 1) importNode(&d, nodes[0], attributeRecipe);
+ else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception
+ else return L""; // TODO: null
+ }
+ return formatRawXML(convertor.from_bytes(d.write_to_string()));
+ }
+
public:
void process(std::istream& input, std::ostream& output, Configuration& configuration) {
@@ -64,7 +105,24 @@
for (xmlpp::Node* n : root->find(convertor.to_bytes(r.xpath), ns)) {
for (AttributeRecipe a : r.attributes) {
// TODO: convert to bytes only once
- writer->writeAttribute(convertor.from_bytes(n->eval_to_string(convertor.to_bytes(a.xpath), ns)));
+ std::string attributeXpath = convertor.to_bytes(a.xpath);
+ if (a.mode == Mode::STRING) {
+ writer->writeAttribute(convertor.from_bytes(n->eval_to_string(attributeXpath, ns)));
+ } else if (a.mode == Mode::BOOLEAN) {
+ writer->writeAttribute(n->eval_to_boolean(attributeXpath, ns) ? L"true" : L"false");
+ } else if (a.mode == Mode::LINE_NUMBER) {
+ xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns);
+ string_t line = attributeNodes.size() ? std::to_wstring(attributeNodes[0]->get_line()) : L""; // TODO: null
+ writer->writeAttribute(line);
+ } else if (a.mode == Mode::XPATH) {
+ xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns);
+ string_t line = attributeNodes.size() ? convertor.from_bytes(attributeNodes[0]->get_path()) : L""; // TODO: null
+ writer->writeAttribute(line);
+ } else if (a.mode == Mode::RAW_XML) {
+ writer->writeAttribute(toRawXML(n, a, ns));
+ } else {
+ throw logic_error("Unsupported mode."); // should never happer, TODO: better relpipe exception
+ }
}
}
}