streamlet-examples/pdfinfo
branchv_0
changeset 45 f466b4c7d9b1
parent 39 225da9aa9c94
child 49 ab48ad4ecb91
equal deleted inserted replaced
44:dc5c210295d0 45:f466b4c7d9b1
       
     1 #!/bin/bash
       
     2 
       
     3 # Relational pipes
       
     4 # Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
       
     5 #
       
     6 # This program is free software: you can redistribute it and/or modify
       
     7 # it under the terms of the GNU General Public License as published by
       
     8 # the Free Software Foundation, version 3 of the License.
       
     9 #
       
    10 # This program is distributed in the hope that it will be useful,
       
    11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
       
    13 # GNU General Public License for more details.
       
    14 #
       
    15 # You should have received a copy of the GNU General Public License
       
    16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
       
    17 
       
    18 
       
    19 # This streamlet provides PDF metadata of given files. It calls the tool pdfinfo.
       
    20 # With no options it returns just number of pages (or 0 if the file is not a PDF).
       
    21 # Specific attributes can be selected using options – e.g. --option 'attribute' 'Author'
       
    22 # List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file.
       
    23 
       
    24 
       
    25 . "$(dirname $0)/streamlet-common.sh"
       
    26 
       
    27 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
       
    28 	pdfFields=()
       
    29 
       
    30 	for (( i=0; i<${#optionNames[@]}; i++)); do
       
    31 		if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
       
    32 			pdfFields+=("${optionValues[$i]}");
       
    33 		elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
       
    34 			pdfPrefix="${optionValues[$i]}";
       
    35 		else
       
    36 			echo "Unsupported option: ${optionNames[$i]}" >&2
       
    37 		fi
       
    38 	done
       
    39 
       
    40 	if [[ -z "$pdfFields" ]]; then
       
    41 		pdfFields=( "Pages" );
       
    42 	fi
       
    43 
       
    44 	for (( i=0; i<${#pdfFields[@]}; i++)); do
       
    45 		if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi
       
    46 		send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}"    "$type"
       
    47 	done
       
    48 
       
    49 	send WAITING_FOR_INPUT_ATTRIBUTES
       
    50 }
       
    51 
       
    52 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
       
    53 	local pdfInfo pdfValid value isNull;
       
    54 
       
    55 	[[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")";
       
    56 	pdfValid="$?";
       
    57 
       
    58 	for (( i=0; i<${#pdfFields[@]}; i++)); do
       
    59 		value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
       
    60 
       
    61 		if  ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true";
       
    62 		elif [[ ! "x$pdfValid" == "x0" ]];                                                                     then value="";  isNull="true";
       
    63 		else                                                                                                                   isNull="false";
       
    64 		fi
       
    65 
       
    66 		send OUTPUT_ATTRIBUTE "$value"    "$isNull";
       
    67 	done
       
    68 	
       
    69 	send WAITING_FOR_INPUT_ATTRIBUTES;
       
    70 }
       
    71 
       
    72 initialize
       
    73 processMessages