streamlet-examples/pdfinfo
author František Kučera <franta-hg@frantovo.cz>
Sat, 06 Jun 2020 01:50:42 +0200
branchv_0
changeset 80 fd7b3e510dd9
parent 70 018e2609f5bb
permissions -rwxr-xr-x
Added tag v0.16 for changeset 288a7f075777
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
#!/bin/bash
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
# Relational pipes
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
#
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
# This program is free software: you can redistribute it and/or modify
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
# it under the terms of the GNU General Public License as published by
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
# the Free Software Foundation, version 3 of the License.
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
#
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
# This program is distributed in the hope that it will be useful,
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
# GNU General Public License for more details.
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
#
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
# You should have received a copy of the GNU General Public License
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
# along with this program. If not, see <http://www.gnu.org/licenses/>.
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    19
# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo.
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    20
# With no options it returns just number of pages (or 0 if the file is not a PDF).
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    21
# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author'
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    22
# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file.
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
50
22ed5647b235 streamlets: include streamlet-common.sh from the directory where the actual streamlet resides (not where is the symlink, if any)
František Kučera <franta-hg@frantovo.cz>
parents: 49
diff changeset
    25
. "$(dirname "$(realpath "$0")")/streamlet-common.sh"
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    28
	pdfFields=()
34
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    29
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    30
	for (( i=0; i<${#optionNames[@]}; i++)); do
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    31
		if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    32
			pdfFields+=("${optionValues[$i]}");
37
3dbe113637ef streamlet: cloc – count lines of code
František Kučera <franta-hg@frantovo.cz>
parents: 34
diff changeset
    33
		elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    34
			pdfPrefix="${optionValues[$i]}";
34
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    35
		else
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    36
			echo "Unsupported option: ${optionNames[$i]}" >&2
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    37
		fi
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    38
	done
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    39
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    40
	if [[ -z "$pdfFields" ]]; then
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    41
		pdfFields=( "Pages" );
34
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    42
	fi
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    43
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    44
	for (( i=0; i<${#pdfFields[@]}; i++)); do
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    45
		if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    46
		send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}"    "$type"
34
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    47
	done
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    48
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    49
	send WAITING_FOR_INPUT_ATTRIBUTES
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    50
}
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    51
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    52
processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    53
	local pdfInfo pdfValid value isNull;
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    54
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    55
	[[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")";
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    56
	pdfValid="$?";
37
3dbe113637ef streamlet: cloc – count lines of code
František Kučera <franta-hg@frantovo.cz>
parents: 34
diff changeset
    57
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    58
	for (( i=0; i<${#pdfFields[@]}; i++)); do
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    59
		value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
37
3dbe113637ef streamlet: cloc – count lines of code
František Kučera <franta-hg@frantovo.cz>
parents: 34
diff changeset
    60
70
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 50
diff changeset
    61
		if [[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]; then isNull="true";
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 50
diff changeset
    62
		else                                                            isNull="false";
39
225da9aa9c94 streamlet examples: pdfinfo
František Kučera <franta-hg@frantovo.cz>
parents: 37
diff changeset
    63
		fi
37
3dbe113637ef streamlet: cloc – count lines of code
František Kučera <franta-hg@frantovo.cz>
parents: 34
diff changeset
    64
34
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    65
		send OUTPUT_ATTRIBUTE "$value"    "$isNull";
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    66
	done
0b9e4af08cc8 streamlet examples: hash
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    67
	
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    68
	send WAITING_FOR_INPUT_ATTRIBUTES;
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    69
}
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    70
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    71
initialize
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    72
processMessages