# HG changeset patch # User František Kučera # Date 1578872741 -3600 # Node ID 225da9aa9c942954d1f53d73696ab53cd724bd55 # Parent 4191af89968ad994d0c11c5a31e6d275f9698bbf streamlet examples: pdfinfo diff -r 4191af89968a -r 225da9aa9c94 streamlet-examples/__relpipe_in_filesystem_script_pdfinfo --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/__relpipe_in_filesystem_script_pdfinfo Mon Jan 13 00:45:41 2020 +0100 @@ -0,0 +1,73 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo. +# With no options it returns just number of pages (or 0 if the file is not a PDF). +# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author' +# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file. + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + pdfFields=() + + for (( i=0; i<${#optionNames[@]}; i++)); do + if [[ "x${optionNames[$i]}" == "xattribute" ]]; then + pdfFields+=("${optionValues[$i]}"); + elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then + pdfPrefix="${optionValues[$i]}"; + else + echo "Unsupported option: ${optionNames[$i]}" >&2 + fi + done + + if [[ -z "$pdfFields" ]]; then + pdfFields=( "Pages" ); + fi + + for (( i=0; i<${#pdfFields[@]}; i++)); do + if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}" "$type" + done + + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + local pdfInfo pdfValid value isNull; + + [[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")"; + pdfValid="$?"; + + for (( i=0; i<${#pdfFields[@]}; i++)); do + value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E" + + if ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true"; + elif [[ ! "x$pdfValid" == "x0" ]]; then value=""; isNull="true"; + else isNull="false"; + fi + + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + done + + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages