/**
* Relational pipes
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "streamlet-common.h"
#include <unistd.h>
#include <regex>
#include <libxml++-2.6/libxml++/libxml++.h>
/**
* This streamlet provides values from XML files.
* It uses the XPath language to define, what portion of XML should be returned.
*
* With no options it does not provide any attributes.
*
* XPath expressions are passed as 'attribute' options.
* e.g. --option 'attribute' 'name()' will return single attribute with the name of the root node.
*
* Attributes can be renamed using aliases: --option 'attribute' 'name()' --as 'name'. Otherwise the full XPath expression is used as a name.
* Number of aliases should match the number of attributes (otherwise only first attributes are renamed, because aliases are global, not relative to the --option).
*
* Like relpipe-in-xmltable, this streamlet supports several modes:
* - string
* - boolean
* - raw-xml
* - line-number
* - xpath
*
* TODO: more OOP, move to separate repository, proper CMake project, clean-up, stabilize API
*/
class XPathStreamlet : public Streamlet {
private:
xmlpp::Node::PrefixNsMap ns;
void findXmlnsInEnvironment() {
std::regex xmlnsEnvPattern("xmlns_(.*)=(.*)");
std::cmatch match;
for (char **env = environ; *env; env++) if (std::regex_match(*env, match, xmlnsEnvPattern)) ns[std::string(match[1])] = match[2];
}
void findXmlnsInOptions() {
for (Option o : getOptions(std::wregex(L"xmlns[:_](.*)"))) ns[toBytes(o.nameMatch[1])] = toBytes(o.value);
for (Option o : getOptions(std::wregex(L"xmlns"), std::wregex(L"([^:]+):(.*)"))) ns[toBytes(o.valueMatch[1])] = toBytes(o.valueMatch[2]);
}
// Modes should share the logic of relpipe-in-xmltable
enum class Mode {
STRING,
BOOLEAN,
// TODO: support also XML number, when we have a rational or decimal numbers in Relational pipes
RAW_XML,
LINE_NUMBER,
XPATH
};
Mode toMode(std::wstring modeName) {
if (modeName == L"string") return Mode::STRING;
else if (modeName == L"boolean") return Mode::BOOLEAN;
else if (modeName == L"raw-xml") return Mode::RAW_XML;
else if (modeName == L"line-number") return Mode::LINE_NUMBER;
else if (modeName == L"xpath") return Mode::XPATH;
else throw std::invalid_argument("Unsupported mode: " + toBytes(modeName));
}
std::wstring toType(Mode mode) {
if (mode == Mode::BOOLEAN) return BOOLEAN;
else if (mode == Mode::LINE_NUMBER) return INTEGER;
else return STRING;
}
class XPathAttribute {
public:
std::wstring name;
std::wstring xpath;
Mode mode = Mode::STRING;
};
std::vector<XPathAttribute> xpathAttributes;
protected:
std::vector<AttributeMetadata> getOutputAttributesMetadata() override {
findXmlnsInEnvironment();
findXmlnsInOptions();
std::vector<AttributeMetadata> oam;
std::vector<Option> modeOptions = getOptions(L"mode");
std::vector<Option> attributeOptions = getOptions(L"attribute");
for (int i = 0, limit = attributeOptions.size(); i < limit; i++) {
Mode mode = i < modeOptions.size() ? toMode(modeOptions[i].value) : Mode::STRING;
std::wstring alias = getAlias(i, attributeOptions[i].value);
xpathAttributes.push_back({alias, attributeOptions[i].value, mode});
oam.push_back({alias, toType(mode)});
}
return oam;
}
std::vector<OutputAttribute> getOutputAttributes() override {
std::vector<OutputAttribute> oa;
try {
xmlpp::DomParser parser;
parser.parse_file(toBytes(getCurrentFile()));
xmlpp::Element* root = parser.get_document()->get_root_node();
for (XPathAttribute xpathAttribute : xpathAttributes) {
std::string xpath = toBytes(xpathAttribute.xpath);
std::wstring result;
bool isNull = false;
if (xpathAttribute.mode == Mode::STRING) {
result = fromBytes(root->eval_to_string(xpath, ns));
} else if (xpathAttribute.mode == Mode::BOOLEAN) {
result = root->eval_to_boolean(xpath, ns) ? L"true" : L"false";
} else if (xpathAttribute.mode == Mode::LINE_NUMBER) {
xmlpp::NodeSet attributeNodes = root->find(xpath, ns);
if (attributeNodes.size()) result = std::to_wstring(attributeNodes[0]->get_line());
else isNull = true;
} else if (xpathAttribute.mode == Mode::XPATH) {
xmlpp::NodeSet attributeNodes = root->find(xpath, ns);
if (attributeNodes.size()) result = fromBytes(attributeNodes[0]->get_path());
else isNull = true;
} else if (xpathAttribute.mode == Mode::RAW_XML) {
throw std::logic_error("Raw XML mode is not yet implemented."); // TODO: implement also RAW_XML
} else {
throw std::logic_error("Unsupported mode."); // should never happer
}
oa.push_back({result, isNull});
}
} catch (xmlpp::parse_error& e) {
for (XPathAttribute xpathAttribute : xpathAttributes) oa.push_back({L"", true});
// invalid XML → xmlpp::parse_error → just skip this file
// invalid XPath → xmlpp::exception → failure
}
return oa;
}
};
STREAMLET_RUN(XPathStreamlet)