streamlets: use $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH variable instead of __relpipe_in_filesystem_script_ prefix
--- a/bash-completion.sh Sat Jan 18 16:41:59 2020 +0100
+++ b/bash-completion.sh Sat Jan 18 20:09:34 2020 +0100
@@ -13,10 +13,6 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-_relpipe_in_filesystem_scripts() {
- while read c; do echo ${c:31}; done < <(compgen -c "__relpipe_in_filesystem_script_");
-}
-
_relpipe_in_filesystem_completion() {
local w0 w1 w2
@@ -60,7 +56,7 @@
elif [[ "$w2" == "--option" && "x$w0" == "x" ]]; then COMPREPLY=("''")
elif [[ "$w1" == "--file" ]]; then COMPREPLY=($(compgen -W "${FILE_FIELDS[*]}" -- "$w0"))
elif [[ "$w1" == "--xattr" ]]; then COMPREPLY=($(compgen -W "${XATTR_FIELDS[*]}" -- "$w0"))
- elif [[ "$w1" == "--streamlet" ]]; then COMPREPLY=($(compgen -W "$(_relpipe_in_filesystem_scripts)" -- "$w0"))
+ elif [[ "$w1" == "--streamlet" ]]; then COMPREPLY=($(while read c; do PATH="$RELPIPE_IN_FILESYSTEM_STREAMLET_PATH" type -P "$c" &>/dev/null && echo "$c"; done < <(PATH="$RELPIPE_IN_FILESYSTEM_STREAMLET_PATH" compgen -A command -- "$w0")))
else
OPTIONS=(
"--relation"
--- a/src/StreamletAttributeFinder.h Sat Jan 18 16:41:59 2020 +0100
+++ b/src/StreamletAttributeFinder.h Sat Jan 18 20:09:34 2020 +0100
@@ -43,11 +43,15 @@
std::map<int, std::shared_ptr<SubProcess>> subProcesses;
std::map<int, std::vector<AttributeMetadata>> cachedMetadata;
- string_t getExecCommand(const RequestedField& field) {
- // TODO: move to another directory, exec, not script + use custom $PATH with no prefix
- return SCRIPT_PREFIX + field.name;
+ string_t getStreamletPath() {
+ const char* originalPath = getenv("PATH");
+ const char* streamletPath = getenv("RELPIPE_IN_FILESYSTEM_STREAMLET_PATH");
+
+ if (originalPath && streamletPath) return convertor.from_bytes(std::string(streamletPath) + ":" + originalPath);
+ else if (originalPath) return convertor.from_bytes(std::string(originalPath));
+ else if (streamletPath) return convertor.from_bytes(std::string(streamletPath));
+ else return L"";
}
-
protected:
void startFile(const fs::path& file, const string& fileRaw, bool exists) override {
@@ -86,7 +90,7 @@
return cachedMetadata[field.id];
} else {
- std::vector<string_t> commandLine = {getExecCommand(field)};
+ std::vector<string_t> commandLine = {field.name};
std::map<string_t, string_t> environment;
for (auto mn : StreamletMsg::getMessageNames()) {
@@ -94,6 +98,8 @@
environment[L"EXEC_MSG_" + std::to_wstring(mn.first)] = mn.second;
}
+ environment[L"PATH"] = getStreamletPath();
+
shared_ptr<SubProcess> subProcess(SubProcess::create(commandLine, environment));
subProcesses[field.id] = subProcess;
--- a/streamlet-examples/__relpipe_in_filesystem_script_cloc Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet counts lines of code of given files. It calls the tool cloc.
-#
-# With no options, these attributes are provided: language, code, comment, blank
-# Specific attributes can be selected using options – e.g. --option 'attribute' 'code'
-# or --option "attribute" "total" (sum of code, comment and blank lines, hidden by default).
-#
-# Optional prefix can be added to attribute names: --option 'prefix' 'my_prefix_'
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- clocFields=()
-
- for (( i=0; i<${#optionNames[@]}; i++)); do
- if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
- if [[ "${optionValues[$i]}" =~ ^(language|blank|comment|code)$ ]]; then
- clocFields+=("${optionValues[$i]}");
- else
- echo "Unsupported attribute: ${optionValues[$i]}" >&2
- fi
- elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
- clocPrefix="${optionValues[$i]}";
- else
- echo "Unsupported option: ${optionNames[$i]}" >&2
- fi
- done
-
- if [[ -z "$clocFields" ]]; then
- clocFields=( "language" "code" "comment" "blank" ); # + "total"
- fi
-
- for (( i=0; i<${#clocFields[@]}; i++)); do
- if [[ "x${clocFields[$i]}" == "xlanguage" ]]; then local type="string"; else local type="integer"; fi
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$clocPrefix${clocFields[$i]}}" "$type"
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- local language files blank comment code total;
-
- [[ -d "$currentFile" ]] || read_nullbyte language files blank comment code total < <( cloc "$currentFile" | perl -ne 'if (/(.*?)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) { print "$1\0$2\0$3\0$4\0$5\0"; print $3 + $4 + $5; print "\0"; }' );
-
- for (( i=0; i<${#clocFields[@]}; i++)); do
- value="${!clocFields[$i]}";
-
- if [[ "x$files" == "x1" ]]; then isNull="false";
- elif [[ "x${clocFields[$i]}" == "xlanguage" ]]; then value=""; isNull="true";
- else value="0"; isNull="true"; fi
-
- send OUTPUT_ATTRIBUTE "$value" "$isNull";
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_exiftool Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet provides various file metadata like EXIF or PDF. It calls the tool exiftool.
-# With no options it returns "File:MIMEType" and "exiftool_xml" attributes.
-# Specific attributes can be selected using options – e.g. --option 'attribute' '…'
-# List of available attributes can be obtained by directly calling the "exiftool -X" command on given file or from the "available_attributes" attribute.
-# Two additional attributes are provided by this streamlet:
-# - "exiftool_xml" – all attributes provided by exiftool in form of XML
-# - "available_attributes" – list of available attributes (each file may have different) separated by line-breaks (TODO: return as an array of strings, when this data type is implemented)
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- streamletFields=()
-
- for (( i=0; i<${#optionNames[@]}; i++)); do
- if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
- streamletFields+=("${optionValues[$i]}");
- elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
- pdfPrefix="${optionValues[$i]}";
- else
- echo "Unsupported option: ${optionNames[$i]}" >&2
- fi
- done
-
- if [[ -z "$streamletFields" ]]; then
- streamletFields=( "File:MIMEType" "exiftool_xml" );
- fi
-
- for (( i=0; i<${#streamletFields[@]}; i++)); do
- # TODO: data type mappings (integers, booleans)
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${streamletFields[$i]}}" "string"
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- local streamletInfo streamletValid value isNull;
-
- [[ -d "$currentFile" ]] || streamletInfo="$(exiftool -X "$currentFile")";
- streamletValid="$?";
-
- for (( i=0; i<${#streamletFields[@]}; i++)); do
- if [[ "x${streamletFields[$i]}" == "xexiftool_xml" ]]; then value="$streamletInfo";
- elif [[ "x${streamletFields[$i]}" == "xavailable_attributes" ]]; then
- value=$'available_attributes\nexiftool_xml\n'"$(echo "$streamletInfo" | relpipe-in-xmltable --relation exif --records '/*/*/*' --attribute 'name' string 'name()' | relpipe-out-nullbyte | tr \\0 \\n)";
- else
- value="$(echo "$streamletInfo" | relpipe-in-xmltable --relation exif --records "/*/*/*[name() = '${streamletFields[$i]}']" --attribute 'value' string '.' | relpipe-out-nullbyte | tr -d \\0)";
- # TODO: parse the XML only once
- # TODO: validate parameter or use parametrized XPath
- # TODO: use real namespaces
- fi
-
- # n.b. for some files exiftools returns exit code, however it provides some basic properties like file timestamps and <ExifTool:Error>Unknown file type</ExifTool:Error> which is also valid XML and might be useful
- if [[ ! "x$streamletValid" == "x0" ]] && [[ "x$value" == "x" ]]; then value=""; isNull="true";
- else isNull="false";
- fi
-
- send OUTPUT_ATTRIBUTE "$value" "$isNull";
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_exiv2 Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet provides EXIF metadata. It calls the tool exiv2.
-# With no options it returns "Image size", "Copyright" and "Exif comment" attributes.
-# Specific attributes can be selected using options – e.g. --option 'attribute' 'Image size'
-# List of available attributes can be obtained by directly calling the exiv2 command on a image file.
-# Two additional attributes are provided by this streamlet: "Image height" and "Image width" (they are extracted from "Image size").
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- streamletFields=()
-
- for (( i=0; i<${#optionNames[@]}; i++)); do
- if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
- streamletFields+=("${optionValues[$i]}");
- elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
- pdfPrefix="${optionValues[$i]}";
- else
- echo "Unsupported option: ${optionNames[$i]}" >&2
- fi
- done
-
- if [[ -z "$streamletFields" ]]; then
- streamletFields=( "Image size" "Copyright" "Exif comment" );
- fi
-
- for (( i=0; i<${#streamletFields[@]}; i++)); do
- if [[ "x${streamletFields[$i]}" == "xImage height" ]] ||[[ "x${streamletFields[$i]}" == "xImage width" ]]; then local type="integer"; else local type="string"; fi
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${streamletFields[$i]}}" "$type"
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- local streamletInfo streamletValid value isNull;
-
- [[ -d "$currentFile" ]] || streamletInfo="$(exiv2 "$currentFile")";
- streamletValid="$?";
-
- for (( i=0; i<${#streamletFields[@]}; i++)); do
- value="$(echo "$streamletInfo" | grep -P "^\Q${streamletFields[$i]}\E\s*:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
-
- if [[ -z "$value" ]] && [[ "x${streamletFields[$i]}" == "xImage width" ]]; then value="$(echo "$streamletInfo" | grep -E 'Image size\s*:\s*[0-9]+ x [0-9]+' | sed -E 's/Image size\s*:\s*([0-9]+) x ([0-9]+)/\1/g')";
- elif [[ -z "$value" ]] && [[ "x${streamletFields[$i]}" == "xImage height" ]]; then value="$(echo "$streamletInfo" | grep -E 'Image size\s*:\s*[0-9]+ x [0-9]+' | sed -E 's/Image size\s*:\s*([0-9]+) x ([0-9]+)/\2/g')";
- fi
-
- # n.b. if file has no exif data, exiv2 exits with error „No Exif data found in the file“ and thus $streamletValid != 0, but there still might be some value like „Image size“
- if [[ ! "x$streamletValid" == "x0" ]] && [[ "x$value" == "x" ]]; then value=""; isNull="true";
- else isNull="false";
- fi
-
- if ( [[ "x${streamletFields[$i]}" == "xImage height" ]] || [[ "x${streamletFields[$i]}" == "xImage width" ]] ) && [[ ! "$value" =~ ^[0-9]+$ ]]; then value="0"; isNull="true"; fi
-
- send OUTPUT_ATTRIBUTE "$value" "$isNull";
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_hash Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet computes hashes of given files.
-# Default algorithm is sha256.
-#
-# Any supported hash algorithm can be specified by e.g. --option "attribute" "sha512"
-# The hash command is derived from the algorithm name by adding "sum" suffix and must be available at $PATH.
-#
-# Multiple algorithms can be specified (just repeat the --option).
-# But single streamlet instance will run them sequentially.
-# When parallell processing is needed (usually faster) then multiple scriptlet instances should be used:
-# --scriptlet hash --option "sha1" --as "sha1" --scriptlet hash --option "sha256" --as "sha256"
-# instead of:
-# --scriptlet hash --option "sha1" --as "sha1" --option "sha256" --as "sha256"
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- hashTypes=()
- hashCommands=()
-
- for (( i=0; i<${#optionNames[@]}; i++)); do
- if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
- if type "${optionValues[$i]}sum" > /dev/null; then
- hashTypes+=("${optionValues[$i]}");
- hashCommands+=("${optionValues[$i]}sum");
- else
- echo "Unsupported attribute: ${optionValues[$i]}" >&2
- fi
- else
- echo "Unsupported option: ${optionNames[$i]}" >&2
- fi
- done
-
- if [[ -z "$hashTypes" ]]; then
- hashTypes=("sha256")
- hashCommands=("sha256sum")
- fi
-
- for (( i=0; i<${#hashTypes[@]}; i++)); do
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-${hashTypes[$i]}}" "string"
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- for (( i=0; i<${#hashTypes[@]}; i++)); do
- value=$("${hashCommands[$i]}" "$currentFile" | cut -d" " -f1) 2>/dev/null;
- if [[ -z "$value" ]]; then isNull="true"; else isNull="false"; fi
- send OUTPUT_ATTRIBUTE "$value" "$isNull";
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_inode Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet provides a single attribute: inode number of given file
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-inode}" "integer"
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- value=$(ls -d -i "$currentFile" | cut -d" " -f1);
- send OUTPUT_ATTRIBUTE "$value" "false";
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_lines_count Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet provides a single attribute: number of lines of given file
-# Standard wc -l is used to count the lines.
-# Directories are reported a 0 lines and with a null flag (will be supported in further Relational pipes versions).
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-lines_count}" "integer"
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- if [[ -d "$currentFile" ]]; then
- value="0";
- isNull="true";
- else
- value=$(wc -l "$currentFile" | cut -d" " -f1);
- isNull="false";
- fi
-
- send OUTPUT_ATTRIBUTE "$value" "$isNull";
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_mime_type Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet provides a single attribute: MIME type of given file.
-# It calls the tool file.
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-mime_type}" "string"
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- value=$(file --preserve-date --brief --mime-type --dereference "$currentFile");
- send OUTPUT_ATTRIBUTE "$value" "false";
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_pdfinfo Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo.
-# With no options it returns just number of pages (or 0 if the file is not a PDF).
-# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author'
-# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file.
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- pdfFields=()
-
- for (( i=0; i<${#optionNames[@]}; i++)); do
- if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
- pdfFields+=("${optionValues[$i]}");
- elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
- pdfPrefix="${optionValues[$i]}";
- else
- echo "Unsupported option: ${optionNames[$i]}" >&2
- fi
- done
-
- if [[ -z "$pdfFields" ]]; then
- pdfFields=( "Pages" );
- fi
-
- for (( i=0; i<${#pdfFields[@]}; i++)); do
- if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}" "$type"
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- local pdfInfo pdfValid value isNull;
-
- [[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")";
- pdfValid="$?";
-
- for (( i=0; i<${#pdfFields[@]}; i++)); do
- value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
-
- if ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true";
- elif [[ ! "x$pdfValid" == "x0" ]]; then value=""; isNull="true";
- else isNull="false";
- fi
-
- send OUTPUT_ATTRIBUTE "$value" "$isNull";
- done
-
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_pdftotext Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet provides a single attribute: plain text content of given PDF file. It calls the tool pdftotext.
-# n.b. the plain text content must fit into memory and shell variable and command-line argument (it usually will)
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-pdftotext}" "string"
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- value="$(pdftotext "$currentFile" - | tr -d \\f)"; # tr just removes page breaks
- if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi
- send OUTPUT_ATTRIBUTE "$value" "$isNull";
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- a/streamlet-examples/__relpipe_in_filesystem_script_tesseract Sat Jan 18 16:41:59 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-# Relational pipes
-# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-# This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract.
-# Languages can be specified by: --option "language" "eng" --option "language" "ces"
-
-
-. "$(dirname $0)/streamlet-common.sh"
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
- tesseractLanguage="";
- for (( i=0; i<${#optionNames[@]}; i++)); do
- if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then
- tesseractLanguage+="+${optionValues[$i]}";
- else
- echo "Unsupported option: ${optionNames[$i]}" >&2
- fi
- done
-
- send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}" "string"
- send WAITING_FOR_INPUT_ATTRIBUTES
-}
-
-processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
- value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")";
- if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi
- value="$(echo "$value" | tr -d \\f)"
- send OUTPUT_ATTRIBUTE "$value" "$isNull";
- send WAITING_FOR_INPUT_ATTRIBUTES;
-}
-
-initialize
-processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/cloc Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet counts lines of code of given files. It calls the tool cloc.
+#
+# With no options, these attributes are provided: language, code, comment, blank
+# Specific attributes can be selected using options – e.g. --option 'attribute' 'code'
+# or --option "attribute" "total" (sum of code, comment and blank lines, hidden by default).
+#
+# Optional prefix can be added to attribute names: --option 'prefix' 'my_prefix_'
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ clocFields=()
+
+ for (( i=0; i<${#optionNames[@]}; i++)); do
+ if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
+ if [[ "${optionValues[$i]}" =~ ^(language|blank|comment|code)$ ]]; then
+ clocFields+=("${optionValues[$i]}");
+ else
+ echo "Unsupported attribute: ${optionValues[$i]}" >&2
+ fi
+ elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
+ clocPrefix="${optionValues[$i]}";
+ else
+ echo "Unsupported option: ${optionNames[$i]}" >&2
+ fi
+ done
+
+ if [[ -z "$clocFields" ]]; then
+ clocFields=( "language" "code" "comment" "blank" ); # + "total"
+ fi
+
+ for (( i=0; i<${#clocFields[@]}; i++)); do
+ if [[ "x${clocFields[$i]}" == "xlanguage" ]]; then local type="string"; else local type="integer"; fi
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$clocPrefix${clocFields[$i]}}" "$type"
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ local language files blank comment code total;
+
+ [[ -d "$currentFile" ]] || read_nullbyte language files blank comment code total < <( cloc "$currentFile" | perl -ne 'if (/(.*?)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) { print "$1\0$2\0$3\0$4\0$5\0"; print $3 + $4 + $5; print "\0"; }' );
+
+ for (( i=0; i<${#clocFields[@]}; i++)); do
+ value="${!clocFields[$i]}";
+
+ if [[ "x$files" == "x1" ]]; then isNull="false";
+ elif [[ "x${clocFields[$i]}" == "xlanguage" ]]; then value=""; isNull="true";
+ else value="0"; isNull="true"; fi
+
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/exiftool Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides various file metadata like EXIF or PDF. It calls the tool exiftool.
+# With no options it returns "File:MIMEType" and "exiftool_xml" attributes.
+# Specific attributes can be selected using options – e.g. --option 'attribute' '…'
+# List of available attributes can be obtained by directly calling the "exiftool -X" command on given file or from the "available_attributes" attribute.
+# Two additional attributes are provided by this streamlet:
+# - "exiftool_xml" – all attributes provided by exiftool in form of XML
+# - "available_attributes" – list of available attributes (each file may have different) separated by line-breaks (TODO: return as an array of strings, when this data type is implemented)
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ streamletFields=()
+
+ for (( i=0; i<${#optionNames[@]}; i++)); do
+ if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
+ streamletFields+=("${optionValues[$i]}");
+ elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
+ pdfPrefix="${optionValues[$i]}";
+ else
+ echo "Unsupported option: ${optionNames[$i]}" >&2
+ fi
+ done
+
+ if [[ -z "$streamletFields" ]]; then
+ streamletFields=( "File:MIMEType" "exiftool_xml" );
+ fi
+
+ for (( i=0; i<${#streamletFields[@]}; i++)); do
+ # TODO: data type mappings (integers, booleans)
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${streamletFields[$i]}}" "string"
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ local streamletInfo streamletValid value isNull;
+
+ [[ -d "$currentFile" ]] || streamletInfo="$(exiftool -X "$currentFile")";
+ streamletValid="$?";
+
+ for (( i=0; i<${#streamletFields[@]}; i++)); do
+ if [[ "x${streamletFields[$i]}" == "xexiftool_xml" ]]; then value="$streamletInfo";
+ elif [[ "x${streamletFields[$i]}" == "xavailable_attributes" ]]; then
+ value=$'available_attributes\nexiftool_xml\n'"$(echo "$streamletInfo" | relpipe-in-xmltable --relation exif --records '/*/*/*' --attribute 'name' string 'name()' | relpipe-out-nullbyte | tr \\0 \\n)";
+ else
+ value="$(echo "$streamletInfo" | relpipe-in-xmltable --relation exif --records "/*/*/*[name() = '${streamletFields[$i]}']" --attribute 'value' string '.' | relpipe-out-nullbyte | tr -d \\0)";
+ # TODO: parse the XML only once
+ # TODO: validate parameter or use parametrized XPath
+ # TODO: use real namespaces
+ fi
+
+ # n.b. for some files exiftools returns exit code, however it provides some basic properties like file timestamps and <ExifTool:Error>Unknown file type</ExifTool:Error> which is also valid XML and might be useful
+ if [[ ! "x$streamletValid" == "x0" ]] && [[ "x$value" == "x" ]]; then value=""; isNull="true";
+ else isNull="false";
+ fi
+
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/exiv2 Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides EXIF metadata. It calls the tool exiv2.
+# With no options it returns "Image size", "Copyright" and "Exif comment" attributes.
+# Specific attributes can be selected using options – e.g. --option 'attribute' 'Image size'
+# List of available attributes can be obtained by directly calling the exiv2 command on a image file.
+# Two additional attributes are provided by this streamlet: "Image height" and "Image width" (they are extracted from "Image size").
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ streamletFields=()
+
+ for (( i=0; i<${#optionNames[@]}; i++)); do
+ if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
+ streamletFields+=("${optionValues[$i]}");
+ elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
+ pdfPrefix="${optionValues[$i]}";
+ else
+ echo "Unsupported option: ${optionNames[$i]}" >&2
+ fi
+ done
+
+ if [[ -z "$streamletFields" ]]; then
+ streamletFields=( "Image size" "Copyright" "Exif comment" );
+ fi
+
+ for (( i=0; i<${#streamletFields[@]}; i++)); do
+ if [[ "x${streamletFields[$i]}" == "xImage height" ]] ||[[ "x${streamletFields[$i]}" == "xImage width" ]]; then local type="integer"; else local type="string"; fi
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${streamletFields[$i]}}" "$type"
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ local streamletInfo streamletValid value isNull;
+
+ [[ -d "$currentFile" ]] || streamletInfo="$(exiv2 "$currentFile")";
+ streamletValid="$?";
+
+ for (( i=0; i<${#streamletFields[@]}; i++)); do
+ value="$(echo "$streamletInfo" | grep -P "^\Q${streamletFields[$i]}\E\s*:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
+
+ if [[ -z "$value" ]] && [[ "x${streamletFields[$i]}" == "xImage width" ]]; then value="$(echo "$streamletInfo" | grep -E 'Image size\s*:\s*[0-9]+ x [0-9]+' | sed -E 's/Image size\s*:\s*([0-9]+) x ([0-9]+)/\1/g')";
+ elif [[ -z "$value" ]] && [[ "x${streamletFields[$i]}" == "xImage height" ]]; then value="$(echo "$streamletInfo" | grep -E 'Image size\s*:\s*[0-9]+ x [0-9]+' | sed -E 's/Image size\s*:\s*([0-9]+) x ([0-9]+)/\2/g')";
+ fi
+
+ # n.b. if file has no exif data, exiv2 exits with error „No Exif data found in the file“ and thus $streamletValid != 0, but there still might be some value like „Image size“
+ if [[ ! "x$streamletValid" == "x0" ]] && [[ "x$value" == "x" ]]; then value=""; isNull="true";
+ else isNull="false";
+ fi
+
+ if ( [[ "x${streamletFields[$i]}" == "xImage height" ]] || [[ "x${streamletFields[$i]}" == "xImage width" ]] ) && [[ ! "$value" =~ ^[0-9]+$ ]]; then value="0"; isNull="true"; fi
+
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/hash Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet computes hashes of given files.
+# Default algorithm is sha256.
+#
+# Any supported hash algorithm can be specified by e.g. --option "attribute" "sha512"
+# The hash command is derived from the algorithm name by adding "sum" suffix and must be available at $PATH.
+#
+# Multiple algorithms can be specified (just repeat the --option).
+# But single streamlet instance will run them sequentially.
+# When parallell processing is needed (usually faster) then multiple scriptlet instances should be used:
+# --scriptlet hash --option "sha1" --as "sha1" --scriptlet hash --option "sha256" --as "sha256"
+# instead of:
+# --scriptlet hash --option "sha1" --as "sha1" --option "sha256" --as "sha256"
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ hashTypes=()
+ hashCommands=()
+
+ for (( i=0; i<${#optionNames[@]}; i++)); do
+ if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
+ if type "${optionValues[$i]}sum" > /dev/null; then
+ hashTypes+=("${optionValues[$i]}");
+ hashCommands+=("${optionValues[$i]}sum");
+ else
+ echo "Unsupported attribute: ${optionValues[$i]}" >&2
+ fi
+ else
+ echo "Unsupported option: ${optionNames[$i]}" >&2
+ fi
+ done
+
+ if [[ -z "$hashTypes" ]]; then
+ hashTypes=("sha256")
+ hashCommands=("sha256sum")
+ fi
+
+ for (( i=0; i<${#hashTypes[@]}; i++)); do
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-${hashTypes[$i]}}" "string"
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ for (( i=0; i<${#hashTypes[@]}; i++)); do
+ value=$("${hashCommands[$i]}" "$currentFile" | cut -d" " -f1) 2>/dev/null;
+ if [[ -z "$value" ]]; then isNull="true"; else isNull="false"; fi
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/inode Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides a single attribute: inode number of given file
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-inode}" "integer"
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ value=$(ls -d -i "$currentFile" | cut -d" " -f1);
+ send OUTPUT_ATTRIBUTE "$value" "false";
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/lines_count Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides a single attribute: number of lines of given file
+# Standard wc -l is used to count the lines.
+# Directories are reported a 0 lines and with a null flag (will be supported in further Relational pipes versions).
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-lines_count}" "integer"
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ if [[ -d "$currentFile" ]]; then
+ value="0";
+ isNull="true";
+ else
+ value=$(wc -l "$currentFile" | cut -d" " -f1);
+ isNull="false";
+ fi
+
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/mime_type Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides a single attribute: MIME type of given file.
+# It calls the tool file.
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-mime_type}" "string"
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ value=$(file --preserve-date --brief --mime-type --dereference "$currentFile");
+ send OUTPUT_ATTRIBUTE "$value" "false";
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/pdfinfo Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides PDF metadata of given files. It calls the tool pdfinfo.
+# With no options it returns just number of pages (or 0 if the file is not a PDF).
+# Specific attributes can be selected using options – e.g. --option 'attribute' 'Author'
+# List of available attributes can be obtained by directly calling the pdfinfo command on a PDF file.
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ pdfFields=()
+
+ for (( i=0; i<${#optionNames[@]}; i++)); do
+ if [[ "x${optionNames[$i]}" == "xattribute" ]]; then
+ pdfFields+=("${optionValues[$i]}");
+ elif [[ "x${optionNames[$i]}" == "xprefix" ]]; then
+ pdfPrefix="${optionValues[$i]}";
+ else
+ echo "Unsupported option: ${optionNames[$i]}" >&2
+ fi
+ done
+
+ if [[ -z "$pdfFields" ]]; then
+ pdfFields=( "Pages" );
+ fi
+
+ for (( i=0; i<${#pdfFields[@]}; i++)); do
+ if [[ "x${pdfFields[$i]}" == "xPages" ]]; then local type="integer"; else local type="string"; fi
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[$i]-$pdfPrefix${pdfFields[$i]}}" "$type"
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ local pdfInfo pdfValid value isNull;
+
+ [[ -d "$currentFile" ]] || pdfInfo="$(pdfinfo -isodates "$currentFile")";
+ pdfValid="$?";
+
+ for (( i=0; i<${#pdfFields[@]}; i++)); do
+ value="$(echo "$pdfInfo" | grep -P "^\Q${pdfFields[$i]}\E:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';)"; # the field name must not contain "\E"
+
+ if ([[ ! "x$pdfValid" == "x0" ]] || [[ "x$value" == "x" ]]) && [[ "x${pdfFields[$i]}" == "xPages" ]]; then value="0"; isNull="true";
+ elif [[ ! "x$pdfValid" == "x0" ]]; then value=""; isNull="true";
+ else isNull="false";
+ fi
+
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ done
+
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/pdftotext Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides a single attribute: plain text content of given PDF file. It calls the tool pdftotext.
+# n.b. the plain text content must fit into memory and shell variable and command-line argument (it usually will)
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-pdftotext}" "string"
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ value="$(pdftotext "$currentFile" - | tr -d \\f)"; # tr just removes page breaks
+ if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/streamlet-examples/tesseract Sat Jan 18 20:09:34 2020 +0100
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract.
+# Languages can be specified by: --option "language" "eng" --option "language" "ces"
+
+
+. "$(dirname $0)/streamlet-common.sh"
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
+ tesseractLanguage="";
+ for (( i=0; i<${#optionNames[@]}; i++)); do
+ if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then
+ tesseractLanguage+="+${optionValues[$i]}";
+ else
+ echo "Unsupported option: ${optionNames[$i]}" >&2
+ fi
+ done
+
+ send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}" "string"
+ send WAITING_FOR_INPUT_ATTRIBUTES
+}
+
+processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
+ value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")";
+ if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi
+ value="$(echo "$value" | tr -d \\f)"
+ send OUTPUT_ATTRIBUTE "$value" "$isNull";
+ send WAITING_FOR_INPUT_ATTRIBUTES;
+}
+
+initialize
+processMessages