81 |
82 |
82 const relpipe::common::type::StringX x2s(const Glib::ustring& value) { |
83 const relpipe::common::type::StringX x2s(const Glib::ustring& value) { |
83 return convertor.from_bytes(value); |
84 return convertor.from_bytes(value); |
84 } |
85 } |
85 |
86 |
|
87 bool isXmlAttribute(const relpipe::common::type::StringX& attributeName) { |
|
88 for (auto pattern : currentRelationConfiguration->xmlAttributes) if (std::regex_match(attributeName, std::wregex(pattern))) return true; |
|
89 return false; |
|
90 } |
|
91 |
|
92 const relpipe::common::type::StringX formatRawXML(const relpipe::common::type::StringX& rawXML) { |
|
93 // TODO: move to a common library (used also in relpipe-in-xmltable) |
|
94 std::wregex pattern(L"^<\\?xml version=\"1.0\" encoding=\"UTF-8\"\\?>\n|\n$"); |
|
95 return std::regex_replace(rawXML, pattern, L""); |
|
96 } |
|
97 |
|
98 const relpipe::common::type::StringX serialize(xmlpp::Element* element, bool asXml) { |
|
99 if (element) { |
|
100 if (asXml) { |
|
101 xmlpp::Document d; |
|
102 d.create_root_node_by_import(element, true); |
|
103 return formatRawXML(x2s(d.write_to_string())); |
|
104 } else { |
|
105 return element->get_child_text() ? x2s(element->get_child_text()->get_content()) : L""; |
|
106 } |
|
107 } else { |
|
108 return L""; |
|
109 } |
|
110 } |
|
111 |
|
112 xmlpp::Element* findSingleElement(const xmlpp::NodeSet& nodeset) { |
|
113 if (nodeset.empty()) return nullptr; |
|
114 else if (nodeset.size() > 1) throw std::invalid_argument("XPath should find one or zero elements."); |
|
115 else if (xmlpp::Element * element = dynamic_cast<xmlpp::Element*> (nodeset[0])) return element; |
|
116 else if (nodeset[0]->get_path() == "/") return findSingleElement(nodeset[0]->find("*")); // support also "/" not only "/*" expressions (return root element in both cases) |
|
117 else throw std::invalid_argument("XPath should find an element, not other kinds of nodes."); |
|
118 } |
|
119 |
86 void writeInputAttributes() { |
120 void writeInputAttributes() { |
87 for (xmlpp::Node* attributeNode : recordElement->get_children()) { |
121 for (xmlpp::Node* attributeNode : recordElement->get_children()) { |
88 if (xmlpp::Element * attributeElement = dynamic_cast<xmlpp::Element*> (attributeNode)) { |
122 if (xmlpp::Element * attributeElement = dynamic_cast<xmlpp::Element*> (attributeNode)) { |
89 auto value = attributeElement->get_child_text()->get_content(); |
123 bool asXml = isXmlAttribute(x2s(attributeElement->get_attribute("name")->get_value())); |
90 relationalWriter->writeAttribute(x2s(value)); |
124 relationalWriter->writeAttribute(serialize(asXml ? dynamic_cast<xmlpp::Element*> (attributeElement->get_first_child()) : attributeElement, asXml)); |
91 } |
125 } |
92 } |
126 } |
93 } |
127 } |
94 |
128 |
95 void writeOutputAttributes() { |
129 void writeOutputAttributes() { |
96 for (auto oa : currentRelationConfiguration->outputAttributes) { |
130 for (auto oa : currentRelationConfiguration->outputAttributes) { |
97 auto value = recordElement->eval_to_string(s2x(oa.xpath), xmlns); |
131 relpipe::common::type::StringX value; |
98 relationalWriter->writeAttribute(x2s(value)); |
132 if (isXmlAttribute(oa.name)) value = serialize(findSingleElement(recordElement->find(s2x(oa.xpath), xmlns)), true); |
|
133 else value = x2s(recordElement->eval_to_string(s2x(oa.xpath), xmlns)); |
|
134 relationalWriter->writeAttribute(value); |
99 } |
135 } |
100 } |
136 } |
101 |
137 |
102 public: |
138 public: |
103 |
139 |
144 |
180 |
145 void attribute(const relpipe::common::type::StringX& value) override { |
181 void attribute(const relpipe::common::type::StringX& value) override { |
146 if (currentRelationConfiguration) { |
182 if (currentRelationConfiguration) { |
147 relpipe::reader::handlers::AttributeMetadata attributeMetadata = currentReaderMetadata[currentAttributeIndex]; |
183 relpipe::reader::handlers::AttributeMetadata attributeMetadata = currentReaderMetadata[currentAttributeIndex]; |
148 |
184 |
|
185 // TODO: Parallel processing of records like in relpipe-in-filesystem? Or share common code with the XPath streamlet? (streamlets are parallelized) |
|
186 |
149 xmlpp::Element* attributeElement = recordElement->add_child(xmlNameCodec.encode(s2x(attributeMetadata.getAttributeName()))); |
187 xmlpp::Element* attributeElement = recordElement->add_child(xmlNameCodec.encode(s2x(attributeMetadata.getAttributeName()))); |
150 attributeElement->set_attribute("name", s2x(attributeMetadata.getAttributeName())); |
188 attributeElement->set_attribute("name", s2x(attributeMetadata.getAttributeName())); |
151 attributeElement->set_attribute("type", s2x(attributeMetadata.getTypeName())); |
189 attributeElement->set_attribute("type", s2x(attributeMetadata.getTypeName())); |
152 attributeElement->add_child_text(s2x(value)); |
190 if (isXmlAttribute(attributeMetadata.getAttributeName())) { |
|
191 if (value.size()) { |
|
192 xmlpp::DomParser attributeParser; |
|
193 attributeParser.parse_memory(s2x(value)); |
|
194 attributeElement->import_node(attributeParser.get_document()->get_root_node(), true); |
|
195 } |
|
196 } else { |
|
197 attributeElement->add_child_text(s2x(value)); |
|
198 } |
153 |
199 |
154 if (currentAttributeIndex == 0) { |
200 if (currentAttributeIndex == 0) { |
155 recordElement->set_attribute("number", std::to_string(currentRecordNumber)); |
201 recordElement->set_attribute("number", std::to_string(currentRecordNumber)); |
156 } |
202 } |
157 |
203 |