# HG changeset patch # User František Kučera # Date 1578182472 -3600 # Node ID 8730e2d0db0e866da5327d6a73a9c82ec23a96b0 # Parent ff69af3c67a30b5c6ebc13cc76ca4855c6eba768 suport multiple modes of reading from XML: string, boolean, raw-xml, line-number, xpath diff -r ff69af3c67a3 -r 8730e2d0db0e bash-completion.sh --- a/bash-completion.sh Thu Jan 02 23:31:44 2020 +0100 +++ b/bash-completion.sh Sun Jan 05 01:01:12 2020 +0100 @@ -33,6 +33,14 @@ "false" ) + MODE=( + "string" + "boolean" + "raw-xml" + "line-number" + "xpath" + ) + # FIXME: user must type " and then press TAB otherwise the completion is broken due to the : colon # # can be fixed by global modification of environment variable: @@ -51,15 +59,22 @@ ) - if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''") - elif [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''") - elif [[ "$w1" == "--records" && "x$w0" == "x" ]]; then COMPREPLY=("'/'") - elif [[ "$w1" == "--attribute" && "x$w0" == "x" ]]; then COMPREPLY=("''") - elif [[ "$w2" == "--attribute" ]]; then COMPREPLY=($(compgen -W "${DATA_TYPE[*]}" -- "$w0")) - elif [[ "$w3" == "--attribute" && "x$w0" == "x" ]]; then COMPREPLY=("''") - elif [[ "$w1" == "--namespace" && "x$w0" == "x" ]]; then COMPREPLY=("''") - elif [[ "$w2" == "--namespace" ]]; then COMPREPLY=($(compgen -W "${XMLNS[*]}" -- "$w0")) - elif [[ "$w1" == "--xinclude" ]]; then COMPREPLY=($(compgen -W "${XINCLUDE[*]}" -- "$w0")) + if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''") + elif [[ "$w1" == "--records" && "x$w0" == "x" ]]; then COMPREPLY=("'/'") + elif [[ "$w1" == "--attribute" && "x$w0" == "x" ]]; then COMPREPLY=("''") + elif [[ "$w2" == "--attribute" ]]; then COMPREPLY=($(compgen -W "${DATA_TYPE[*]}" -- "$w0")) + elif [[ "$w3" == "--attribute" && "x$w0" == "x" ]]; then COMPREPLY=("''") + elif [[ "$w1" == "--namespace" && "x$w0" == "x" ]]; then COMPREPLY=("''") + elif [[ "$w2" == "--namespace" ]]; then COMPREPLY=($(compgen -W "${XMLNS[*]}" -- "$w0")) + elif [[ "$w1" == "--xinclude" ]]; then COMPREPLY=($(compgen -W "${XINCLUDE[*]}" -- "$w0")) + elif [[ "$w1" == "--mode" ]]; then COMPREPLY=($(compgen -W "${MODE[*]}" -- "$w0")) + elif [[ "$w1" == "--raw-xml-nodelist-wrapper" ]]; then COMPREPLY=("'xml'") + elif [[ "$w2" == "--raw-xml-nodelist-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''") + elif [[ "$w3" == "--raw-xml-nodelist-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''") + + elif [[ "$w1" == "--raw-xml-attribute-wrapper" ]]; then COMPREPLY=("'attribute'") + elif [[ "$w2" == "--raw-xml-attribute-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''") + elif [[ "$w3" == "--raw-xml-attribute-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''") else OPTIONS=( "--namespace" @@ -68,6 +83,9 @@ "--name-is-xpath" "--attribute" "--xinclude" + "--mode" + "--raw-xml-nodelist-wrapper" + "--raw-xml-attribute-wrapper" ) COMPREPLY=($(compgen -W "${OPTIONS[*]}" -- "$w0")) fi diff -r ff69af3c67a3 -r 8730e2d0db0e src/CLIParser.h --- a/src/CLIParser.h Thu Jan 02 23:31:44 2020 +0100 +++ b/src/CLIParser.h Sun Jan 05 01:01:12 2020 +0100 @@ -68,6 +68,9 @@ static const string_t OPTION_RECORDS; static const string_t OPTION_ATTRIBUTE; static const string_t OPTION_XINCLUDE; + static const string_t OPTION_MODE; + static const string_t OPTION_RAW_XML_NODELIST_WRAPPER; + static const string_t OPTION_RAW_XML_ATTRIBUTE_WRAPPER; Configuration parse(const std::vector& arguments) { Configuration c; @@ -90,10 +93,32 @@ currentRelation.xpath = readNext(arguments, i); } else if (option == OPTION_ATTRIBUTE) { AttributeRecipe attribute; + attribute.mode = currentRelation.mode; + attribute.rawXmlNodeListWrapper = currentRelation.rawXmlNodeListWrapper; + attribute.rawXmlAttributeWrapper = currentRelation.rawXmlAttributeWrapper; attribute.name = readNext(arguments, i); attribute.type = parseTypeId(readNext(arguments, i)); attribute.xpath = readNext(arguments, i); currentRelation.attributes.push_back(attribute); + } else if (option == OPTION_MODE) { + string_t modeName = readNext(arguments, i); + Mode mode; + if (modeName == L"string") mode = Mode::STRING; + else if (modeName == L"boolean") mode = Mode::BOOLEAN; + else if (modeName == L"raw-xml") mode = Mode::RAW_XML; + else if (modeName == L"line-number") mode = Mode::LINE_NUMBER; + else if (modeName == L"xpath") mode = Mode::XPATH; + else throw relpipe::cli::RelpipeCLIException(L"Unsupported mode: " + modeName, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS); + if (currentRelation.attributes.size()) currentRelation.attributes.back().mode = mode; + else currentRelation.mode = mode; + } else if (option == OPTION_RAW_XML_NODELIST_WRAPPER) { + XmlElementSkeleton w = {readNext(arguments, i), readNext(arguments, i), readNext(arguments, i)}; + if (currentRelation.attributes.size()) currentRelation.attributes.back().rawXmlNodeListWrapper = w; + else currentRelation.rawXmlNodeListWrapper = w; + } else if (option == OPTION_RAW_XML_ATTRIBUTE_WRAPPER) { + XmlElementSkeleton w = {readNext(arguments, i), readNext(arguments, i), readNext(arguments, i)}; + if (currentRelation.attributes.size()) currentRelation.attributes.back().rawXmlAttributeWrapper = w; + else currentRelation.rawXmlAttributeWrapper = w; } else throw relpipe::cli::RelpipeCLIException(L"Unsupported CLI option: " + option, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS); } addRelation(c, currentRelation); // last relation @@ -111,6 +136,9 @@ const string_t CLIParser::OPTION_RECORDS = L"--records"; const string_t CLIParser::OPTION_ATTRIBUTE = L"--attribute"; const string_t CLIParser::OPTION_XINCLUDE = L"--xinclude"; +const string_t CLIParser::OPTION_MODE = L"--mode"; +const string_t CLIParser::OPTION_RAW_XML_NODELIST_WRAPPER = L"--raw-xml-nodelist-wrapper"; +const string_t CLIParser::OPTION_RAW_XML_ATTRIBUTE_WRAPPER = L"--raw-xml-attribute-wrapper"; } } diff -r ff69af3c67a3 -r 8730e2d0db0e src/Configuration.h --- a/src/Configuration.h Thu Jan 02 23:31:44 2020 +0100 +++ b/src/Configuration.h Sun Jan 05 01:01:12 2020 +0100 @@ -25,6 +25,31 @@ namespace in { namespace xmltable { +enum class Mode { + STRING, + BOOLEAN, + // TODO: support also XML number, when we have a rational or decimal numbers in Relational pipes + RAW_XML, + LINE_NUMBER, + XPATH +}; + +class XmlElementSkeleton { +public: + relpipe::writer::string_t name; + relpipe::writer::string_t uri; + relpipe::writer::string_t prefix; + + XmlElementSkeleton() { + } + + XmlElementSkeleton(relpipe::writer::string_t name, relpipe::writer::string_t uri = L"", relpipe::writer::string_t prefix = L"") : name(name), uri(uri), prefix(prefix) { + } + + virtual ~XmlElementSkeleton() { + } +}; + class AttributeRecipe { public: @@ -34,6 +59,10 @@ relpipe::writer::string_t name; relpipe::writer::TypeId type; relpipe::writer::string_t xpath; + Mode mode = Mode::STRING; + XmlElementSkeleton rawXmlNodeListWrapper; + XmlElementSkeleton rawXmlAttributeWrapper; + }; class RelationConfiguration { @@ -46,7 +75,11 @@ relpipe::writer::boolean_t nameIsXPath = false; relpipe::writer::string_t xpath; std::vector attributes; - + + // Defaults/templates for AttributeRecipe: + Mode mode = Mode::STRING; + XmlElementSkeleton rawXmlNodeListWrapper; + XmlElementSkeleton rawXmlAttributeWrapper = {L"attribute"}; }; class Configuration { diff -r ff69af3c67a3 -r 8730e2d0db0e src/XMLTableCommand.h --- a/src/XMLTableCommand.h Thu Jan 02 23:31:44 2020 +0100 +++ b/src/XMLTableCommand.h Sun Jan 05 01:01:12 2020 +0100 @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include @@ -39,6 +41,45 @@ private: std::wstring_convert> convertor; // TODO: support also other encodings. + string_t formatRawXML(string_t rawXML) { + std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$"); + return std::regex_replace(rawXML, pattern, L""); + } + + void importNode(xmlpp::Node* parent, xmlpp::Node* child, AttributeRecipe attributeRecipe) { + if (dynamic_cast (child)) parent->add_child_with_new_ns( + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); + else parent->import_node(child, true); + } + + void importNode(xmlpp::Document* document, xmlpp::Node* child, AttributeRecipe attributeRecipe) { + if (dynamic_cast (child)) document->create_root_node( + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.name), + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.uri), + convertor.to_bytes(attributeRecipe.rawXmlAttributeWrapper.prefix))->import_node(child); + else document->create_root_node_by_import(child, true); + } + + string_t toRawXML(xmlpp::Node* parent, AttributeRecipe attributeRecipe, xmlpp::Node::PrefixNsMap ns) { + xmlpp::Document d; + xmlpp::NodeSet nodes = parent->find(convertor.to_bytes(attributeRecipe.xpath), ns); + + if (attributeRecipe.rawXmlNodeListWrapper.name.size()) { + d.create_root_node( + convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.name), + convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.uri), + convertor.to_bytes(attributeRecipe.rawXmlNodeListWrapper.prefix)); + for (xmlpp::Node* node : nodes) importNode(d.get_root_node(), node, attributeRecipe); + } else { + if (nodes.size() == 1) importNode(&d, nodes[0], attributeRecipe); + else if (nodes.size() > 1) throw std::invalid_argument("Multiple nodes found where only one was expected. Use nodelist wrapper."); // TODO: better relpipe exception + else return L""; // TODO: null + } + return formatRawXML(convertor.from_bytes(d.write_to_string())); + } + public: void process(std::istream& input, std::ostream& output, Configuration& configuration) { @@ -64,7 +105,24 @@ for (xmlpp::Node* n : root->find(convertor.to_bytes(r.xpath), ns)) { for (AttributeRecipe a : r.attributes) { // TODO: convert to bytes only once - writer->writeAttribute(convertor.from_bytes(n->eval_to_string(convertor.to_bytes(a.xpath), ns))); + std::string attributeXpath = convertor.to_bytes(a.xpath); + if (a.mode == Mode::STRING) { + writer->writeAttribute(convertor.from_bytes(n->eval_to_string(attributeXpath, ns))); + } else if (a.mode == Mode::BOOLEAN) { + writer->writeAttribute(n->eval_to_boolean(attributeXpath, ns) ? L"true" : L"false"); + } else if (a.mode == Mode::LINE_NUMBER) { + xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); + string_t line = attributeNodes.size() ? std::to_wstring(attributeNodes[0]->get_line()) : L""; // TODO: null + writer->writeAttribute(line); + } else if (a.mode == Mode::XPATH) { + xmlpp::NodeSet attributeNodes = n->find(attributeXpath, ns); + string_t line = attributeNodes.size() ? convertor.from_bytes(attributeNodes[0]->get_path()) : L""; // TODO: null + writer->writeAttribute(line); + } else if (a.mode == Mode::RAW_XML) { + writer->writeAttribute(toRawXML(n, a, ns)); + } else { + throw logic_error("Unsupported mode."); // should never happer, TODO: better relpipe exception + } } } }