streamlet-examples/pdfinfo
branchv_0
changeset 45 f466b4c7d9b1
parent 39 225da9aa9c94
child 49 ab48ad4ecb91
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/pdfinfo	Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo.
+# With no options it returns just number of pages (or 0 if the file is not a PDF).
+# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author'
+# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file.
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+	pdfFields=()
+
+	for (( i=0; i<${#optionNames[@]}; i++)); do
+		if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
+			pdfFields+=("${optionValues[$i]}");
+		elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
+			pdfPrefix="${optionValues[$i]}";
+		else
+			echo "Unsupported option: ${optionNames[$i]}" >&2
+		fi
+	done
+
+	if [[ -z "$pdfFields" ]]; then
+		pdfFields=( "Pages" );
+	fi
+
+	for (( i=0; i<${#pdfFields[@]}; i++)); do
+		if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi
+		send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}"    "$type"
+	done
+
+	send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+	local pdfInfo pdfValid value isNull;
+
+	[[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")";
+	pdfValid="$?";
+
+	for (( i=0; i<${#pdfFields[@]}; i++)); do
+		value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
+
+		if  ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true";
+		elif [[ ! "x$pdfValid" == "x0" ]];                                                                     then value="";  isNull="true";
+		else                                                                                                                   isNull="false";
+		fi
+
+		send OUTPUT_ATTRIBUTE "$value"    "$isNull";
+	done
+	
+	send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages