|
1 #!/bin/bash |
|
2 |
|
3 # Relational pipes |
|
4 # Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
|
5 # |
|
6 # This program is free software: you can redistribute it and/or modify |
|
7 # it under the terms of the GNU General Public License as published by |
|
8 # the Free Software Foundation, version 3 of the License. |
|
9 # |
|
10 # This program is distributed in the hope that it will be useful, |
|
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 # GNU General Public License for more details. |
|
14 # |
|
15 # You should have received a copy of the GNU General Public License |
|
16 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
17 |
|
18 |
|
19 # This streamlet provides PDF metadata of given files. It calls the tool pdfinfo. |
|
20 # With no options it returns just number of pages (or 0 if the file is not a PDF). |
|
21 # Specific attributes can be selected using options – e.g. --option 'attribute' 'Author' |
|
22 # List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file. |
|
23 |
|
24 |
|
25 . "$(dirname $0)/streamlet-common.sh" |
|
26 |
|
27 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { |
|
28 pdfFields=() |
|
29 |
|
30 for (( i=0; i<${#optionNames[@]}; i++)); do |
|
31 if [[ "x${optionNames[$i]}" == "xattribute" ]]; then |
|
32 pdfFields+=("${optionValues[$i]}"); |
|
33 elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then |
|
34 pdfPrefix="${optionValues[$i]}"; |
|
35 else |
|
36 echo "Unsupported option: ${optionNames[$i]}" >&2 |
|
37 fi |
|
38 done |
|
39 |
|
40 if [[ -z "$pdfFields" ]]; then |
|
41 pdfFields=( "Pages" ); |
|
42 fi |
|
43 |
|
44 for (( i=0; i<${#pdfFields[@]}; i++)); do |
|
45 if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi |
|
46 send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}" "$type" |
|
47 done |
|
48 |
|
49 send WAITING_FOR_INPUT_ATTRIBUTES |
|
50 } |
|
51 |
|
52 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { |
|
53 local pdfInfo pdfValid value isNull; |
|
54 |
|
55 [[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")"; |
|
56 pdfValid="$?"; |
|
57 |
|
58 for (( i=0; i<${#pdfFields[@]}; i++)); do |
|
59 value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E" |
|
60 |
|
61 if ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true"; |
|
62 elif [[ ! "x$pdfValid" == "x0" ]]; then value=""; isNull="true"; |
|
63 else isNull="false"; |
|
64 fi |
|
65 |
|
66 send OUTPUT_ATTRIBUTE "$value" "$isNull"; |
|
67 done |
|
68 |
|
69 send WAITING_FOR_INPUT_ATTRIBUTES; |
|
70 } |
|
71 |
|
72 initialize |
|
73 processMessages |