# HG changeset patch # User František Kučera # Date 1579374574 -3600 # Node ID f466b4c7d9b1d638d1cad12af21c742dd68bdc22 # Parent dc5c210295d096ca8c8e7a7b2ba76a30d21c4067 streamlets: use $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH variable instead of __relpipe_in_filesystem_script_ prefix diff -r dc5c210295d0 -r f466b4c7d9b1 bash-completion.sh --- a/bash-completion.sh Sat Jan 18 16:41:59 2020 +0100 +++ b/bash-completion.sh Sat Jan 18 20:09:34 2020 +0100 @@ -13,10 +13,6 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -_relpipe_in_filesystem_scripts() { - while read c; do echo ${c:31}; done < <(compgen -c "__relpipe_in_filesystem_script_"); -} - _relpipe_in_filesystem_completion() { local w0 w1 w2 @@ -60,7 +56,7 @@ elif [[ "$w2" == "--option" && "x$w0" == "x" ]]; then COMPREPLY=("''") elif [[ "$w1" == "--file" ]]; then COMPREPLY=($(compgen -W "${FILE_FIELDS[*]}" -- "$w0")) elif [[ "$w1" == "--xattr" ]]; then COMPREPLY=($(compgen -W "${XATTR_FIELDS[*]}" -- "$w0")) - elif [[ "$w1" == "--streamlet" ]]; then COMPREPLY=($(compgen -W "$(_relpipe_in_filesystem_scripts)" -- "$w0")) + elif [[ "$w1" == "--streamlet" ]]; then COMPREPLY=($(while read c; do PATH="$RELPIPE_IN_FILESYSTEM_STREAMLET_PATH" type -P "$c" &>/dev/null && echo "$c"; done < <(PATH="$RELPIPE_IN_FILESYSTEM_STREAMLET_PATH" compgen -A command -- "$w0"))) else OPTIONS=( "--relation" diff -r dc5c210295d0 -r f466b4c7d9b1 src/StreamletAttributeFinder.h --- a/src/StreamletAttributeFinder.h Sat Jan 18 16:41:59 2020 +0100 +++ b/src/StreamletAttributeFinder.h Sat Jan 18 20:09:34 2020 +0100 @@ -43,11 +43,15 @@ std::map> subProcesses; std::map> cachedMetadata; - string_t getExecCommand(const RequestedField& field) { - // TODO: move to another directory, exec, not script + use custom $PATH with no prefix - return SCRIPT_PREFIX + field.name; + string_t getStreamletPath() { + const char* originalPath = getenv("PATH"); + const char* streamletPath = getenv("RELPIPE_IN_FILESYSTEM_STREAMLET_PATH"); + + if (originalPath && streamletPath) return convertor.from_bytes(std::string(streamletPath) + ":" + originalPath); + else if (originalPath) return convertor.from_bytes(std::string(originalPath)); + else if (streamletPath) return convertor.from_bytes(std::string(streamletPath)); + else return L""; } - protected: void startFile(const fs::path& file, const string& fileRaw, bool exists) override { @@ -86,7 +90,7 @@ return cachedMetadata[field.id]; } else { - std::vector commandLine = {getExecCommand(field)}; + std::vector commandLine = {field.name}; std::map environment; for (auto mn : StreamletMsg::getMessageNames()) { @@ -94,6 +98,8 @@ environment[L"EXEC_MSG_" + std::to_wstring(mn.first)] = mn.second; } + environment[L"PATH"] = getStreamletPath(); + shared_ptr subProcess(SubProcess::create(commandLine, environment)); subProcesses[field.id] = subProcess; diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_cloc --- a/streamlet-examples/__relpipe_in_filesystem_script_cloc Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet counts lines of code of given files. It calls the tool cloc. -# -# With no options, these attributes are provided: language, code, comment, blank -# Specific attributes can be selected using options – e.g. --option 'attribute' 'code' -# or --option "attribute" "total" (sum of code, comment and blank lines, hidden by default). -# -# Optional prefix can be added to attribute names: --option 'prefix' 'my_prefix_' - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - clocFields=() - - for (( i=0; i<${#optionNames[@]}; i++)); do - if [[ "x${optionNames[$i]}" == "xattribute" ]]; then - if [[ "${optionValues[$i]}" =~ ^(language|blank|comment|code)$ ]]; then - clocFields+=("${optionValues[$i]}"); - else - echo "Unsupported attribute: ${optionValues[$i]}" >&2 - fi - elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then - clocPrefix="${optionValues[$i]}"; - else - echo "Unsupported option: ${optionNames[$i]}" >&2 - fi - done - - if [[ -z "$clocFields" ]]; then - clocFields=( "language" "code" "comment" "blank" ); # + "total" - fi - - for (( i=0; i<${#clocFields[@]}; i++)); do - if [[ "x${clocFields[$i]}" == "xlanguage" ]]; then local type="string"; else local type="integer"; fi - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$clocPrefix${clocFields[$i]}}" "$type" - done - - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - local language files blank comment code total; - - [[ -d "$currentFile" ]] || read_nullbyte language files blank comment code total < <( cloc "$currentFile" | perl -ne 'if (/(.*?)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) { print "$1\0$2\0$3\0$4\0$5\0"; print $3 + $4 + $5; print "\0"; }' ); - - for (( i=0; i<${#clocFields[@]}; i++)); do - value="${!clocFields[$i]}"; - - if [[ "x$files" == "x1" ]]; then isNull="false"; - elif [[ "x${clocFields[$i]}" == "xlanguage" ]]; then value=""; isNull="true"; - else value="0"; isNull="true"; fi - - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - done - - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_exiftool --- a/streamlet-examples/__relpipe_in_filesystem_script_exiftool Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides various file metadata like EXIF or PDF. It calls the tool exiftool. -# With no options it returns "File:MIMEType" and "exiftool_xml" attributes. -# Specific attributes can be selected using options – e.g. --option 'attribute' '…' -# List of available attributes can be obtained by directly calling the "exiftool -X" command on given file or from the "available_attributes" attribute. -# Two additional attributes are provided by this streamlet: -# - "exiftool_xml" – all attributes provided by exiftool in form of XML -# - "available_attributes" – list of available attributes (each file may have different) separated by line-breaks (TODO: return as an array of strings, when this data type is implemented) - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - streamletFields=() - - for (( i=0; i<${#optionNames[@]}; i++)); do - if [[ "x${optionNames[$i]}" == "xattribute" ]]; then - streamletFields+=("${optionValues[$i]}"); - elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then - pdfPrefix="${optionValues[$i]}"; - else - echo "Unsupported option: ${optionNames[$i]}" >&2 - fi - done - - if [[ -z "$streamletFields" ]]; then - streamletFields=( "File:MIMEType" "exiftool_xml" ); - fi - - for (( i=0; i<${#streamletFields[@]}; i++)); do - # TODO: data type mappings (integers, booleans) - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${streamletFields[$i]}}" "string" - done - - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - local streamletInfo streamletValid value isNull; - - [[ -d "$currentFile" ]] || streamletInfo="$(exiftool -X "$currentFile")"; - streamletValid="$?"; - - for (( i=0; i<${#streamletFields[@]}; i++)); do - if [[ "x${streamletFields[$i]}" == "xexiftool_xml" ]]; then value="$streamletInfo"; - elif [[ "x${streamletFields[$i]}" == "xavailable_attributes" ]]; then - value=$'available_attributes\nexiftool_xml\n'"$(echo "$streamletInfo" | relpipe-in-xmltable --relation exif --records '/*/*/*' --attribute 'name' string 'name()' | relpipe-out-nullbyte | tr \\0 \\n)"; - else - value="$(echo "$streamletInfo" | relpipe-in-xmltable --relation exif --records "/*/*/*[name() = '${streamletFields[$i]}']" --attribute 'value' string '.' | relpipe-out-nullbyte | tr -d \\0)"; - # TODO: parse the XML only once - # TODO: validate parameter or use parametrized XPath - # TODO: use real namespaces - fi - - # n.b. for some files exiftools returns exit code, however it provides some basic properties like file timestamps and Unknown file type which is also valid XML and might be useful - if [[ ! "x$streamletValid" == "x0" ]] && [[ "x$value" == "x" ]]; then value=""; isNull="true"; - else isNull="false"; - fi - - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - done - - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_exiv2 --- a/streamlet-examples/__relpipe_in_filesystem_script_exiv2 Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides EXIF metadata. It calls the tool exiv2. -# With no options it returns "Image size", "Copyright" and "Exif comment" attributes. -# Specific attributes can be selected using options – e.g. --option 'attribute' 'Image size' -# List of available attributes can be obtained by directly calling the exiv2 command on a image file. -# Two additional attributes are provided by this streamlet: "Image height" and "Image width" (they are extracted from "Image size"). - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - streamletFields=() - - for (( i=0; i<${#optionNames[@]}; i++)); do - if [[ "x${optionNames[$i]}" == "xattribute" ]]; then - streamletFields+=("${optionValues[$i]}"); - elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then - pdfPrefix="${optionValues[$i]}"; - else - echo "Unsupported option: ${optionNames[$i]}" >&2 - fi - done - - if [[ -z "$streamletFields" ]]; then - streamletFields=( "Image size" "Copyright" "Exif comment" ); - fi - - for (( i=0; i<${#streamletFields[@]}; i++)); do - if [[ "x${streamletFields[$i]}" == "xImage height" ]] ||[[ "x${streamletFields[$i]}" == "xImage width" ]]; then local type="integer"; else local type="string"; fi - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${streamletFields[$i]}}" "$type" - done - - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - local streamletInfo streamletValid value isNull; - - [[ -d "$currentFile" ]] || streamletInfo="$(exiv2 "$currentFile")"; - streamletValid="$?"; - - for (( i=0; i<${#streamletFields[@]}; i++)); do - value="$(echo "$streamletInfo" | grep -P "^\Q${streamletFields[$i]}\E\s*:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E" - - if [[ -z "$value" ]] && [[ "x${streamletFields[$i]}" == "xImage width" ]]; then value="$(echo "$streamletInfo" | grep -E 'Image size\s*:\s*[0-9]+ x [0-9]+' | sed -E 's/Image size\s*:\s*([0-9]+) x ([0-9]+)/\1/g')"; - elif [[ -z "$value" ]] && [[ "x${streamletFields[$i]}" == "xImage height" ]]; then value="$(echo "$streamletInfo" | grep -E 'Image size\s*:\s*[0-9]+ x [0-9]+' | sed -E 's/Image size\s*:\s*([0-9]+) x ([0-9]+)/\2/g')"; - fi - - # n.b. if file has no exif data, exiv2 exits with error „No Exif data found in the file“ and thus $streamletValid != 0, but there still might be some value like „Image size“ - if [[ ! "x$streamletValid" == "x0" ]] && [[ "x$value" == "x" ]]; then value=""; isNull="true"; - else isNull="false"; - fi - - if ( [[ "x${streamletFields[$i]}" == "xImage height" ]] || [[ "x${streamletFields[$i]}" == "xImage width" ]] ) && [[ ! "$value" =~ ^[0-9]+$ ]]; then value="0"; isNull="true"; fi - - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - done - - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_hash --- a/streamlet-examples/__relpipe_in_filesystem_script_hash Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet computes hashes of given files. -# Default algorithm is sha256. -# -# Any supported hash algorithm can be specified by e.g. --option "attribute" "sha512" -# The hash command is derived from the algorithm name by adding "sum" suffix and must be available at $PATH. -# -# Multiple algorithms can be specified (just repeat the --option). -# But single streamlet instance will run them sequentially. -# When parallell processing is needed (usually faster) then multiple scriptlet instances should be used: -# --scriptlet hash --option "sha1" --as "sha1" --scriptlet hash --option "sha256" --as "sha256" -# instead of: -# --scriptlet hash --option "sha1" --as "sha1" --option "sha256" --as "sha256" - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - hashTypes=() - hashCommands=() - - for (( i=0; i<${#optionNames[@]}; i++)); do - if [[ "x${optionNames[$i]}" == "xattribute" ]]; then - if type "${optionValues[$i]}sum" > /dev/null; then - hashTypes+=("${optionValues[$i]}"); - hashCommands+=("${optionValues[$i]}sum"); - else - echo "Unsupported attribute: ${optionValues[$i]}" >&2 - fi - else - echo "Unsupported option: ${optionNames[$i]}" >&2 - fi - done - - if [[ -z "$hashTypes" ]]; then - hashTypes=("sha256") - hashCommands=("sha256sum") - fi - - for (( i=0; i<${#hashTypes[@]}; i++)); do - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-${hashTypes[$i]}}" "string" - done - - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - for (( i=0; i<${#hashTypes[@]}; i++)); do - value=$("${hashCommands[$i]}" "$currentFile" | cut -d" " -f1) 2>/dev/null; - if [[ -z "$value" ]]; then isNull="true"; else isNull="false"; fi - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - done - - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_inode --- a/streamlet-examples/__relpipe_in_filesystem_script_inode Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides a single attribute: inode number of given file - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-inode}" "integer" - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - value=$(ls -d -i "$currentFile" | cut -d" " -f1); - send OUTPUT_ATTRIBUTE "$value" "false"; - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_lines_count --- a/streamlet-examples/__relpipe_in_filesystem_script_lines_count Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides a single attribute: number of lines of given file -# Standard wc -l is used to count the lines. -# Directories are reported a 0 lines and with a null flag (will be supported in further Relational pipes versions). - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-lines_count}" "integer" - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - if [[ -d "$currentFile" ]]; then - value="0"; - isNull="true"; - else - value=$(wc -l "$currentFile" | cut -d" " -f1); - isNull="false"; - fi - - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_mime_type --- a/streamlet-examples/__relpipe_in_filesystem_script_mime_type Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides a single attribute: MIME type of given file. -# It calls the tool file. - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-mime_type}" "string" - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - value=$(file --preserve-date --brief --mime-type --dereference "$currentFile"); - send OUTPUT_ATTRIBUTE "$value" "false"; - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_pdfinfo --- a/streamlet-examples/__relpipe_in_filesystem_script_pdfinfo Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo. -# With no options it returns just number of pages (or 0 if the file is not a PDF). -# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author' -# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file. - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - pdfFields=() - - for (( i=0; i<${#optionNames[@]}; i++)); do - if [[ "x${optionNames[$i]}" == "xattribute" ]]; then - pdfFields+=("${optionValues[$i]}"); - elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then - pdfPrefix="${optionValues[$i]}"; - else - echo "Unsupported option: ${optionNames[$i]}" >&2 - fi - done - - if [[ -z "$pdfFields" ]]; then - pdfFields=( "Pages" ); - fi - - for (( i=0; i<${#pdfFields[@]}; i++)); do - if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}" "$type" - done - - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - local pdfInfo pdfValid value isNull; - - [[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")"; - pdfValid="$?"; - - for (( i=0; i<${#pdfFields[@]}; i++)); do - value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E" - - if ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true"; - elif [[ ! "x$pdfValid" == "x0" ]]; then value=""; isNull="true"; - else isNull="false"; - fi - - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - done - - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_pdftotext --- a/streamlet-examples/__relpipe_in_filesystem_script_pdftotext Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides a single attribute: plain text content of given PDF file. It calls the tool pdftotext. -# n.b. the plain text content must fit into memory and shell variable and command-line argument (it usually will) - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-pdftotext}" "string" - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - value="$(pdftotext "$currentFile" - | tr -d \\f)"; # tr just removes page breaks - if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_tesseract --- a/streamlet-examples/__relpipe_in_filesystem_script_tesseract Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract. -# Languages can be specified by: --option "language" "eng" --option "language" "ces" - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - tesseractLanguage=""; - for (( i=0; i<${#optionNames[@]}; i++)); do - if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then - tesseractLanguage+="+${optionValues[$i]}"; - else - echo "Unsupported option: ${optionNames[$i]}" >&2 - fi - done - - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}" "string" - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")"; - if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi - value="$(echo "$value" | tr -d \\f)" - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/cloc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/cloc Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,78 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet counts lines of code of given files. It calls the tool cloc. +# +# With no options, these attributes are provided: language, code, comment, blank +# Specific attributes can be selected using options – e.g. --option 'attribute' 'code' +# or --option "attribute" "total" (sum of code, comment and blank lines, hidden by default). +# +# Optional prefix can be added to attribute names: --option 'prefix' 'my_prefix_' + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + clocFields=() + + for (( i=0; i<${#optionNames[@]}; i++)); do + if [[ "x${optionNames[$i]}" == "xattribute" ]]; then + if [[ "${optionValues[$i]}" =~ ^(language|blank|comment|code)$ ]]; then + clocFields+=("${optionValues[$i]}"); + else + echo "Unsupported attribute: ${optionValues[$i]}" >&2 + fi + elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then + clocPrefix="${optionValues[$i]}"; + else + echo "Unsupported option: ${optionNames[$i]}" >&2 + fi + done + + if [[ -z "$clocFields" ]]; then + clocFields=( "language" "code" "comment" "blank" ); # + "total" + fi + + for (( i=0; i<${#clocFields[@]}; i++)); do + if [[ "x${clocFields[$i]}" == "xlanguage" ]]; then local type="string"; else local type="integer"; fi + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$clocPrefix${clocFields[$i]}}" "$type" + done + + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + local language files blank comment code total; + + [[ -d "$currentFile" ]] || read_nullbyte language files blank comment code total < <( cloc "$currentFile" | perl -ne 'if (/(.*?)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) { print "$1\0$2\0$3\0$4\0$5\0"; print $3 + $4 + $5; print "\0"; }' ); + + for (( i=0; i<${#clocFields[@]}; i++)); do + value="${!clocFields[$i]}"; + + if [[ "x$files" == "x1" ]]; then isNull="false"; + elif [[ "x${clocFields[$i]}" == "xlanguage" ]]; then value=""; isNull="true"; + else value="0"; isNull="true"; fi + + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + done + + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/exiftool --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/exiftool Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,84 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides various file metadata like EXIF or PDF. It calls the tool exiftool. +# With no options it returns "File:MIMEType" and "exiftool_xml" attributes. +# Specific attributes can be selected using options – e.g. --option 'attribute' '…' +# List of available attributes can be obtained by directly calling the "exiftool -X" command on given file or from the "available_attributes" attribute. +# Two additional attributes are provided by this streamlet: +# - "exiftool_xml" – all attributes provided by exiftool in form of XML +# - "available_attributes" – list of available attributes (each file may have different) separated by line-breaks (TODO: return as an array of strings, when this data type is implemented) + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + streamletFields=() + + for (( i=0; i<${#optionNames[@]}; i++)); do + if [[ "x${optionNames[$i]}" == "xattribute" ]]; then + streamletFields+=("${optionValues[$i]}"); + elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then + pdfPrefix="${optionValues[$i]}"; + else + echo "Unsupported option: ${optionNames[$i]}" >&2 + fi + done + + if [[ -z "$streamletFields" ]]; then + streamletFields=( "File:MIMEType" "exiftool_xml" ); + fi + + for (( i=0; i<${#streamletFields[@]}; i++)); do + # TODO: data type mappings (integers, booleans) + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${streamletFields[$i]}}" "string" + done + + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + local streamletInfo streamletValid value isNull; + + [[ -d "$currentFile" ]] || streamletInfo="$(exiftool -X "$currentFile")"; + streamletValid="$?"; + + for (( i=0; i<${#streamletFields[@]}; i++)); do + if [[ "x${streamletFields[$i]}" == "xexiftool_xml" ]]; then value="$streamletInfo"; + elif [[ "x${streamletFields[$i]}" == "xavailable_attributes" ]]; then + value=$'available_attributes\nexiftool_xml\n'"$(echo "$streamletInfo" | relpipe-in-xmltable --relation exif --records '/*/*/*' --attribute 'name' string 'name()' | relpipe-out-nullbyte | tr \\0 \\n)"; + else + value="$(echo "$streamletInfo" | relpipe-in-xmltable --relation exif --records "/*/*/*[name() = '${streamletFields[$i]}']" --attribute 'value' string '.' | relpipe-out-nullbyte | tr -d \\0)"; + # TODO: parse the XML only once + # TODO: validate parameter or use parametrized XPath + # TODO: use real namespaces + fi + + # n.b. for some files exiftools returns exit code, however it provides some basic properties like file timestamps and Unknown file type which is also valid XML and might be useful + if [[ ! "x$streamletValid" == "x0" ]] && [[ "x$value" == "x" ]]; then value=""; isNull="true"; + else isNull="false"; + fi + + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + done + + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/exiv2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/exiv2 Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,80 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides EXIF metadata. It calls the tool exiv2. +# With no options it returns "Image size", "Copyright" and "Exif comment" attributes. +# Specific attributes can be selected using options – e.g. --option 'attribute' 'Image size' +# List of available attributes can be obtained by directly calling the exiv2 command on a image file. +# Two additional attributes are provided by this streamlet: "Image height" and "Image width" (they are extracted from "Image size"). + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + streamletFields=() + + for (( i=0; i<${#optionNames[@]}; i++)); do + if [[ "x${optionNames[$i]}" == "xattribute" ]]; then + streamletFields+=("${optionValues[$i]}"); + elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then + pdfPrefix="${optionValues[$i]}"; + else + echo "Unsupported option: ${optionNames[$i]}" >&2 + fi + done + + if [[ -z "$streamletFields" ]]; then + streamletFields=( "Image size" "Copyright" "Exif comment" ); + fi + + for (( i=0; i<${#streamletFields[@]}; i++)); do + if [[ "x${streamletFields[$i]}" == "xImage height" ]] ||[[ "x${streamletFields[$i]}" == "xImage width" ]]; then local type="integer"; else local type="string"; fi + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${streamletFields[$i]}}" "$type" + done + + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + local streamletInfo streamletValid value isNull; + + [[ -d "$currentFile" ]] || streamletInfo="$(exiv2 "$currentFile")"; + streamletValid="$?"; + + for (( i=0; i<${#streamletFields[@]}; i++)); do + value="$(echo "$streamletInfo" | grep -P "^\Q${streamletFields[$i]}\E\s*:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E" + + if [[ -z "$value" ]] && [[ "x${streamletFields[$i]}" == "xImage width" ]]; then value="$(echo "$streamletInfo" | grep -E 'Image size\s*:\s*[0-9]+ x [0-9]+' | sed -E 's/Image size\s*:\s*([0-9]+) x ([0-9]+)/\1/g')"; + elif [[ -z "$value" ]] && [[ "x${streamletFields[$i]}" == "xImage height" ]]; then value="$(echo "$streamletInfo" | grep -E 'Image size\s*:\s*[0-9]+ x [0-9]+' | sed -E 's/Image size\s*:\s*([0-9]+) x ([0-9]+)/\2/g')"; + fi + + # n.b. if file has no exif data, exiv2 exits with error „No Exif data found in the file“ and thus $streamletValid != 0, but there still might be some value like „Image size“ + if [[ ! "x$streamletValid" == "x0" ]] && [[ "x$value" == "x" ]]; then value=""; isNull="true"; + else isNull="false"; + fi + + if ( [[ "x${streamletFields[$i]}" == "xImage height" ]] || [[ "x${streamletFields[$i]}" == "xImage width" ]] ) && [[ ! "$value" =~ ^[0-9]+$ ]]; then value="0"; isNull="true"; fi + + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + done + + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/hash --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/hash Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,75 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet computes hashes of given files. +# Default algorithm is sha256. +# +# Any supported hash algorithm can be specified by e.g. --option "attribute" "sha512" +# The hash command is derived from the algorithm name by adding "sum" suffix and must be available at $PATH. +# +# Multiple algorithms can be specified (just repeat the --option). +# But single streamlet instance will run them sequentially. +# When parallell processing is needed (usually faster) then multiple scriptlet instances should be used: +# --scriptlet hash --option "sha1" --as "sha1" --scriptlet hash --option "sha256" --as "sha256" +# instead of: +# --scriptlet hash --option "sha1" --as "sha1" --option "sha256" --as "sha256" + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + hashTypes=() + hashCommands=() + + for (( i=0; i<${#optionNames[@]}; i++)); do + if [[ "x${optionNames[$i]}" == "xattribute" ]]; then + if type "${optionValues[$i]}sum" > /dev/null; then + hashTypes+=("${optionValues[$i]}"); + hashCommands+=("${optionValues[$i]}sum"); + else + echo "Unsupported attribute: ${optionValues[$i]}" >&2 + fi + else + echo "Unsupported option: ${optionNames[$i]}" >&2 + fi + done + + if [[ -z "$hashTypes" ]]; then + hashTypes=("sha256") + hashCommands=("sha256sum") + fi + + for (( i=0; i<${#hashTypes[@]}; i++)); do + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-${hashTypes[$i]}}" "string" + done + + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + for (( i=0; i<${#hashTypes[@]}; i++)); do + value=$("${hashCommands[$i]}" "$currentFile" | cut -d" " -f1) 2>/dev/null; + if [[ -z "$value" ]]; then isNull="true"; else isNull="false"; fi + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + done + + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/inode --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/inode Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,36 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides a single attribute: inode number of given file + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-inode}" "integer" + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + value=$(ls -d -i "$currentFile" | cut -d" " -f1); + send OUTPUT_ATTRIBUTE "$value" "false"; + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/lines_count --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/lines_count Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,45 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides a single attribute: number of lines of given file +# Standard wc -l is used to count the lines. +# Directories are reported a 0 lines and with a null flag (will be supported in further Relational pipes versions). + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-lines_count}" "integer" + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + if [[ -d "$currentFile" ]]; then + value="0"; + isNull="true"; + else + value=$(wc -l "$currentFile" | cut -d" " -f1); + isNull="false"; + fi + + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/mime_type --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/mime_type Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,37 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides a single attribute: MIME type of given file. +# It calls the tool file. + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-mime_type}" "string" + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + value=$(file --preserve-date --brief --mime-type --dereference "$currentFile"); + send OUTPUT_ATTRIBUTE "$value" "false"; + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/pdfinfo --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/pdfinfo Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,73 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo. +# With no options it returns just number of pages (or 0 if the file is not a PDF). +# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author' +# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file. + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + pdfFields=() + + for (( i=0; i<${#optionNames[@]}; i++)); do + if [[ "x${optionNames[$i]}" == "xattribute" ]]; then + pdfFields+=("${optionValues[$i]}"); + elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then + pdfPrefix="${optionValues[$i]}"; + else + echo "Unsupported option: ${optionNames[$i]}" >&2 + fi + done + + if [[ -z "$pdfFields" ]]; then + pdfFields=( "Pages" ); + fi + + for (( i=0; i<${#pdfFields[@]}; i++)); do + if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}" "$type" + done + + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + local pdfInfo pdfValid value isNull; + + [[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")"; + pdfValid="$?"; + + for (( i=0; i<${#pdfFields[@]}; i++)); do + value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E" + + if ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true"; + elif [[ ! "x$pdfValid" == "x0" ]]; then value=""; isNull="true"; + else isNull="false"; + fi + + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + done + + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/pdftotext --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/pdftotext Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,38 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides a single attribute: plain text content of given PDF file. It calls the tool pdftotext. +# n.b. the plain text content must fit into memory and shell variable and command-line argument (it usually will) + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-pdftotext}" "string" + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + value="$(pdftotext "$currentFile" - | tr -d \\f)"; # tr just removes page breaks + if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/tesseract --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/tesseract Sat Jan 18 20:09:34 2020 +0100 @@ -0,0 +1,48 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract. +# Languages can be specified by: --option "language" "eng" --option "language" "ces" + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + tesseractLanguage=""; + for (( i=0; i<${#optionNames[@]}; i++)); do + if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then + tesseractLanguage+="+${optionValues[$i]}"; + else + echo "Unsupported option: ${optionNames[$i]}" >&2 + fi + done + + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}" "string" + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")"; + if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi + value="$(echo "$value" | tr -d \\f)" + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages