streamlet-examples/pdfinfo
author František Kučera <franta-hg@frantovo.cz>
Wed, 29 Jan 2020 00:58:37 +0100
branchv_0
changeset 70 018e2609f5bb
parent 50 22ed5647b235
permissions -rwxr-xr-x
streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder

#!/bin/bash

# Relational pipes
# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo.
# With no options it returns just number of pages (or 0 if the file is not a PDF).
# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author'
# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file.


. "$(dirname "$(realpath "$0")")/streamlet-common.sh"

processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
	pdfFields=()

	for (( i=0; i<${#optionNames[@]}; i++)); do
		if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
			pdfFields+=("${optionValues[$i]}");
		elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
			pdfPrefix="${optionValues[$i]}";
		else
			echo "Unsupported option: ${optionNames[$i]}" >&2
		fi
	done

	if [[ -z "$pdfFields" ]]; then
		pdfFields=( "Pages" );
	fi

	for (( i=0; i<${#pdfFields[@]}; i++)); do
		if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi
		send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}"    "$type"
	done

	send WAITING_FOR_INPUT_ATTRIBUTES
}

processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
	local pdfInfo pdfValid value isNull;

	[[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")";
	pdfValid="$?";

	for (( i=0; i<${#pdfFields[@]}; i++)); do
		value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"

		if [[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]; then isNull="true";
		else                                                            isNull="false";
		fi

		send OUTPUT_ATTRIBUTE "$value"    "$isNull";
	done
	
	send WAITING_FOR_INPUT_ATTRIBUTES;
}

initialize
processMessages