# HG changeset patch # User František Kučera # Date 1580255917 -3600 # Node ID 018e2609f5bbdca6f08b6bae6d74f0fba2530c4d # Parent 52f837fbb216de3464f8815885ad053d9adcd957 streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder diff -r 52f837fbb216 -r 018e2609f5bb src/StreamletAttributeFinder.h --- a/src/StreamletAttributeFinder.h Tue Jan 28 23:35:25 2020 +0100 +++ b/src/StreamletAttributeFinder.h Wed Jan 29 00:58:37 2020 +0100 @@ -82,6 +82,16 @@ } } + void writeAttribute(RelationalWriter* writer, TypeId typeId, SubProcess::Message* m) { + if (m->parameters[1] == L"true") { + if (typeId == TypeId::BOOLEAN) writer->writeAttribute(L"false"); + else if (typeId == TypeId::INTEGER)writer->writeAttribute(L"0"); + else writer->writeAttribute(L""); // TODO: write acruall null values (when supported) + } else { + writer->writeAttribute(m->parameters[0]); + } + } + protected: void startFile(const fs::path& file, const string& fileRaw, bool exists) override { @@ -98,7 +108,7 @@ if (field.group == RequestedField::GROUP_STREAMLET) { for (auto metadata : cachedMetadata[field.id]) { SubProcess::Message m = subProcesses[field.id]->read(); - if (m.code == StreamletMsg::OUTPUT_ATTRIBUTE) writer->writeAttribute(m.parameters[0]); + if (m.code == StreamletMsg::OUTPUT_ATTRIBUTE) writeAttribute(writer, metadata.typeId, &m); else throw RelpipeWriterException(L"Protocol violation from exec sub-process while reading: „" + metadata.attributeName + L"“. Expected OUTPUT_ATTRIBUTE but got: " + m.toString()); } diff -r 52f837fbb216 -r 018e2609f5bb streamlet-examples/cloc --- a/streamlet-examples/cloc Tue Jan 28 23:35:25 2020 +0100 +++ b/streamlet-examples/cloc Wed Jan 29 00:58:37 2020 +0100 @@ -64,9 +64,8 @@ for (( i=0; i<${#clocFields[@]}; i++)); do value="${!clocFields[$i]}"; - if [[ "x$files" == "x1" ]]; then isNull="false"; - elif [[ "x${clocFields[$i]}" == "xlanguage" ]]; then value=""; isNull="true"; - else value="0"; isNull="true"; fi + if [[ "x$files" == "x1" ]]; then isNull="false"; + else isNull="true"; fi send OUTPUT_ATTRIBUTE "$value" "$isNull"; done diff -r 52f837fbb216 -r 018e2609f5bb streamlet-examples/exiv2 --- a/streamlet-examples/exiv2 Tue Jan 28 23:35:25 2020 +0100 +++ b/streamlet-examples/exiv2 Wed Jan 29 00:58:37 2020 +0100 @@ -68,7 +68,7 @@ else isNull="false"; fi - if ( [[ "x${streamletFields[$i]}" == "xImage height" ]] || [[ "x${streamletFields[$i]}" == "xImage width" ]] ) && [[ ! "$value" =~ ^[0-9]+$ ]]; then value="0"; isNull="true"; fi + if ( [[ "x${streamletFields[$i]}" == "xImage height" ]] || [[ "x${streamletFields[$i]}" == "xImage width" ]] ) && [[ ! "$value" =~ ^[0-9]+$ ]]; then isNull="true"; fi send OUTPUT_ATTRIBUTE "$value" "$isNull"; done diff -r 52f837fbb216 -r 018e2609f5bb streamlet-examples/lines_count --- a/streamlet-examples/lines_count Tue Jan 28 23:35:25 2020 +0100 +++ b/streamlet-examples/lines_count Wed Jan 29 00:58:37 2020 +0100 @@ -30,7 +30,6 @@ processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { if [[ -d "$currentFile" ]]; then - value="0"; isNull="true"; else value=$(wc -l "$currentFile" | cut -d" " -f1); diff -r 52f837fbb216 -r 018e2609f5bb streamlet-examples/pdfinfo --- a/streamlet-examples/pdfinfo Tue Jan 28 23:35:25 2020 +0100 +++ b/streamlet-examples/pdfinfo Wed Jan 29 00:58:37 2020 +0100 @@ -58,9 +58,8 @@ for (( i=0; i<${#pdfFields[@]}; i++)); do value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E" - if ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true"; - elif [[ ! "x$pdfValid" == "x0" ]]; then value=""; isNull="true"; - else isNull="false"; + if [[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]; then isNull="true"; + else isNull="false"; fi send OUTPUT_ATTRIBUTE "$value" "$isNull"; diff -r 52f837fbb216 -r 018e2609f5bb streamlet-examples/xpath.cpp --- a/streamlet-examples/xpath.cpp Tue Jan 28 23:35:25 2020 +0100 +++ b/streamlet-examples/xpath.cpp Wed Jan 29 00:58:37 2020 +0100 @@ -62,13 +62,6 @@ else return STRING; } - // TODO: should not be done in particular streamlets but in the worker - std::wstring toNullValue(std::wstring type) { - if (type == BOOLEAN) return L"false"; - else if (type == INTEGER) return L"0"; - else return L""; - } - class XPathAttribute { public: @@ -118,30 +111,22 @@ result = root->eval_to_boolean(xpath, ns) ? L"true" : L"false"; } else if (xpathAttribute.mode == Mode::LINE_NUMBER) { xmlpp::NodeSet attributeNodes = root->find(xpath, ns); - if (attributeNodes.size()) { - result = std::to_wstring(attributeNodes[0]->get_line()); - } else { - result = L"0"; - isNull = true; - } + if (attributeNodes.size()) result = std::to_wstring(attributeNodes[0]->get_line()); + else isNull = true; } else if (xpathAttribute.mode == Mode::XPATH) { xmlpp::NodeSet attributeNodes = root->find(xpath, ns); - if (attributeNodes.size()) { - result = convertor.from_bytes(attributeNodes[0]->get_path()); - } else { - result = L""; - isNull = true; - } + if (attributeNodes.size()) result = convertor.from_bytes(attributeNodes[0]->get_path()); + else isNull = true; } else if (xpathAttribute.mode == Mode::RAW_XML) { throw std::logic_error("Raw XML mode is not yet implemented."); // TODO: implement also RAW_XML } else { throw std::logic_error("Unsupported mode."); // should never happer } - oa.push_back({result, false}); + oa.push_back({result, isNull}); } } catch (xmlpp::parse_error& e) { - for (XPathAttribute xpathAttribute : xpathAttributes) oa.push_back({toNullValue(toType(xpathAttribute.mode)), true}); + for (XPathAttribute xpathAttribute : xpathAttributes) oa.push_back({L"", true}); // invalid XML → xmlpp::parse_error → just skip this file // invalid XPath → xmlpp::exception → failure }