/**
* Relational pipes
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <memory>
#include <string>
#include <vector>
#include <codecvt>
#include <regex>
#include <stdexcept>
#include <libxml++-2.6/libxml++/libxml++.h>
#include <relpipe/common/type/typedefs.h>
#include <relpipe/reader/TypeId.h>
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
#include <relpipe/reader/handlers/AttributeMetadata.h>
#include <relpipe/writer/Factory.h>
#include <relpipe/cli/RelpipeCLIException.h>
#include "Configuration.h"
#include "XMLNameCodec.h"
namespace relpipe {
namespace tr {
namespace xpath {
class XPathHandler : public relpipe::reader::handlers::RelationalReaderStringHandler {
private:
std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // XML is in UTF-8
shared_ptr<relpipe::writer::RelationalWriter> relationalWriter;
Configuration configuration;
RelationConfiguration* currentRelationConfiguration = nullptr;
std::vector<relpipe::reader::handlers::AttributeMetadata> currentReaderMetadata;
std::vector<relpipe::writer::AttributeMetadata> currentWriterMetadata;
size_t currentAttributeIndex = 0;
size_t currentRecordNumber = 1;
xmlpp::Node::PrefixNsMap xmlns;
xmlpp::Document dom;
xmlpp::Element* recordElement = nullptr;
relpipe::in::xmltable::XMLNameCodec xmlNameCodec; // TODO: move to a common library
void copyInputAttributesToOutput() {
for (auto rm : currentReaderMetadata) currentWriterMetadata.push_back({rm.getAttributeName(), relationalWriter->toTypeId(rm.getTypeName())});
}
bool isPrependingInputAttributes() {
return currentRelationConfiguration->inputAttributePolicy == InputAttributePolicy::Prepend
|| (currentRelationConfiguration->inputAttributePolicy == InputAttributePolicy::Auto && currentRelationConfiguration->outputAttributes.size() == 0);
}
bool isAppendingInputAttributes() {
return currentRelationConfiguration->inputAttributePolicy == InputAttributePolicy::Append;
}
void resetRecordElement() {
if (recordElement) dom.get_root_node()->remove_child(recordElement);
recordElement = dom.get_root_node()->add_child("record");
}
const Glib::ustring s2x(relpipe::common::type::StringX value) {
return Glib::ustring(convertor.to_bytes(value));
}
const relpipe::common::type::StringX x2s(const Glib::ustring& value) {
return convertor.from_bytes(value);
}
bool isXmlAttribute(const relpipe::common::type::StringX& attributeName) {
for (auto pattern : currentRelationConfiguration->xmlAttributes) if (std::regex_match(attributeName, std::wregex(pattern))) return true;
return false;
}
const relpipe::common::type::StringX formatRawXML(const relpipe::common::type::StringX& rawXML) {
// TODO: move to a common library (used also in relpipe-in-xmltable)
std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$");
return std::regex_replace(rawXML, pattern, L"");
}
const relpipe::common::type::StringX serialize(xmlpp::Element* element, bool asXml) {
if (element) {
if (asXml) {
xmlpp::Document d;
d.create_root_node_by_import(element, true);
return formatRawXML(x2s(d.write_to_string()));
} else {
return element->get_child_text() ? x2s(element->get_child_text()->get_content()) : L"";
}
} else {
return L"";
}
}
xmlpp::Element* findSingleElement(const xmlpp::NodeSet& nodeset) {
// TODO: Allow multiple elements and attributes and wrap them? Like in relpipe-in-xmltable --raw-xml-attribute-wrapper --raw-xml-nodelist-wrapper
if (nodeset.empty()) return nullptr;
else if (nodeset.size() > 1) throw std::invalid_argument("XPath should find one or zero elements.");
else if (xmlpp::Element * element = dynamic_cast<xmlpp::Element*> (nodeset[0])) return element;
else if (nodeset[0]->get_path() == "/") return findSingleElement(nodeset[0]->find("*")); // support also "/" not only "/*" expressions (return root element in both cases)
else throw std::invalid_argument("XPath should find an element, not other kinds of nodes.");
}
void writeInputAttributes() {
for (xmlpp::Node* attributeNode : recordElement->get_children()) {
if (xmlpp::Element * attributeElement = dynamic_cast<xmlpp::Element*> (attributeNode)) {
bool asXml = isXmlAttribute(x2s(attributeElement->get_attribute("name")->get_value()));
relationalWriter->writeAttribute(serialize(asXml ? dynamic_cast<xmlpp::Element*> (attributeElement->get_first_child()) : attributeElement, asXml));
}
}
}
void writeOutputAttributes() {
for (auto oa : currentRelationConfiguration->outputAttributes) {
relpipe::common::type::StringX value;
if (isXmlAttribute(oa.name)) value = serialize(findSingleElement(recordElement->find(s2x(oa.xpath), xmlns)), true);
else value = x2s(recordElement->eval_to_string(s2x(oa.xpath), xmlns));
relationalWriter->writeAttribute(value);
}
}
public:
XPathHandler(shared_ptr<relpipe::writer::RelationalWriter> relationalWriter, Configuration configuration) : relationalWriter(relationalWriter), configuration(configuration) {
for (int i = 0; i < configuration.namespaceMappings.size(); i++) {
std::string prefix = convertor.to_bytes(configuration.namespaceMappings[i]);
std::string uri = convertor.to_bytes(configuration.namespaceMappings[++i]);
xmlns[prefix] = uri;
}
}
virtual ~XPathHandler() {
}
void startRelation(relpipe::common::type::StringX name, std::vector<relpipe::reader::handlers::AttributeMetadata> attributes) override {
currentRelationConfiguration = nullptr;
for (int i = 0; i < configuration.relationConfigurations.size(); i++) {
if (std::regex_match(name, std::wregex(configuration.relationConfigurations[i].relation))) {
currentRelationConfiguration = &configuration.relationConfigurations[i];
break;
}
}
currentReaderMetadata = attributes;
currentWriterMetadata.clear();
currentRecordNumber = 1;
recordElement = nullptr;
if (currentRelationConfiguration == nullptr) {
copyInputAttributesToOutput();
} else {
if (isPrependingInputAttributes()) copyInputAttributesToOutput();
for (auto oa : currentRelationConfiguration->outputAttributes) currentWriterMetadata.push_back({oa.name, oa.type});
if (isAppendingInputAttributes()) copyInputAttributesToOutput();
// TODO: better metadata structure
// TODO: optional namespaces
dom.create_root_node("relpipe-tr-xpath");
dom.get_root_node()->add_child("relation-name")->add_child_text(s2x(name));
resetRecordElement();
}
relationalWriter->startRelation(name, currentWriterMetadata, true);
}
void attribute(const relpipe::common::type::StringX& value) override {
if (currentRelationConfiguration) {
relpipe::reader::handlers::AttributeMetadata attributeMetadata = currentReaderMetadata[currentAttributeIndex];
// TODO: Parallel processing of records like in relpipe-in-filesystem? Or share common code with the XPath streamlet? (streamlets are parallelized)
xmlpp::Element* attributeElement = recordElement->add_child(xmlNameCodec.encode(s2x(attributeMetadata.getAttributeName())));
attributeElement->set_attribute("name", s2x(attributeMetadata.getAttributeName()));
attributeElement->set_attribute("type", s2x(attributeMetadata.getTypeName()));
if (isXmlAttribute(attributeMetadata.getAttributeName())) {
if (value.size()) {
xmlpp::DomParser attributeParser;
attributeParser.parse_memory(s2x(value));
attributeElement->import_node(attributeParser.get_document()->get_root_node(), true);
}
} else {
// TODO: better boolean mapping? Missing text node will be evaluated as false(), however the expression still had to be "someAttribute/text()" because "someAttribute" will be evaluated as true() because the "someAttribute" element is present.
attributeElement->add_child_text(s2x(value));
}
if (currentAttributeIndex == 0) {
recordElement->set_attribute("number", std::to_string(currentRecordNumber));
}
currentAttributeIndex++;
if (currentAttributeIndex == currentReaderMetadata.size()) {
if (currentRelationConfiguration->where.empty() || recordElement->eval_to_boolean(s2x(currentRelationConfiguration->where), xmlns)) {
if (isPrependingInputAttributes()) writeInputAttributes();
writeOutputAttributes();
if (isAppendingInputAttributes()) writeInputAttributes();
}
resetRecordElement();
currentAttributeIndex = 0;
currentRecordNumber++;
}
} else {
relationalWriter->writeAttribute(value);
}
}
void endOfPipe() {
}
};
}
}
}