streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
#!/bin/bash
# Relational pipes
# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo.
# With no options it returns just number of pages (or 0 if the file is not a PDF).
# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author'
# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file.
. "$(dirname "$(realpath "$0")")/streamlet-common.sh"
processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
pdfFields=()
for (( i=0; i<${#optionNames[@]}; i++)); do
if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
pdfFields+=("${optionValues[$i]}");
elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
pdfPrefix="${optionValues[$i]}";
else
echo "Unsupported option: ${optionNames[$i]}" >&2
fi
done
if [[ -z "$pdfFields" ]]; then
pdfFields=( "Pages" );
fi
for (( i=0; i<${#pdfFields[@]}; i++)); do
if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi
send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}" "$type"
done
send WAITING_FOR_INPUT_ATTRIBUTES
}
processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
local pdfInfo pdfValid value isNull;
[[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")";
pdfValid="$?";
for (( i=0; i<${#pdfFields[@]}; i++)); do
value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
if [[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]; then isNull="true";
else isNull="false";
fi
send OUTPUT_ATTRIBUTE "$value" "$isNull";
done
send WAITING_FOR_INPUT_ATTRIBUTES;
}
initialize
processMessages