--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/pdfinfo Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo.
+# With no options it returns just number of pages (or 0 if the file is not a PDF).
+# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author'
+# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file.
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ pdfFields=()
+
+ for (( i=0; i<${#optionNames[@]}; i++)); do
+ if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
+ pdfFields+=("${optionValues[$i]}");
+ elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
+ pdfPrefix="${optionValues[$i]}";
+ else
+ echo "Unsupported option: ${optionNames[$i]}" >&2
+ fi
+ done
+
+ if [[ -z "$pdfFields" ]]; then
+ pdfFields=( "Pages" );
+ fi
+
+ for (( i=0; i<${#pdfFields[@]}; i++)); do
+ if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}" "$type"
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ local pdfInfo pdfValid value isNull;
+
+ [[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")";
+ pdfValid="$?";
+
+ for (( i=0; i<${#pdfFields[@]}; i++)); do
+ value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
+
+ if ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true";
+ elif [[ ! "x$pdfValid" == "x0" ]]; then value=""; isNull="true";
+ else isNull="false";
+ fi
+
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages