diff -r afbce5b8aabf -r f1bbcf616269 streamlet-examples/__relpipe_in_filesystem_script_pdftotext --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/__relpipe_in_filesystem_script_pdftotext Fri Jan 17 13:02:27 2020 +0100 @@ -0,0 +1,38 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides a single attribute: plain text content of given PDF file. It calls the tool pdftotext. +# n.b. the plain text content must fit into memory and shell variable and command-line argument (it usually will) + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-pdftotext}" "string" + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + value="$(pdftotext "$currentFile" - | tr -d \\f)"; # tr just removes page breaks + if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages