streamlet-examples/tesseract
author František Kučera <franta-hg@frantovo.cz>
Thu, 30 Jan 2020 23:27:49 +0100
branchv_0
changeset 78 5a63bf594f53
parent 50 22ed5647b235
permissions -rwxr-xr-x
streamlet examples: xpath: support XInclude (like in relpipe-tr-xmltable)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
#!/bin/bash
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
# Relational pipes
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
#
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
# This program is free software: you can redistribute it and/or modify
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
# it under the terms of the GNU General Public License as published by
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
# the Free Software Foundation, version 3 of the License.
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
#
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
# This program is distributed in the hope that it will be useful,
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
# GNU General Public License for more details.
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
#
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
# You should have received a copy of the GNU General Public License
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
# along with this program. If not, see <http://www.gnu.org/licenses/>.
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
43
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    19
# This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract.
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    20
# Languages can be specified by: --option "language" "eng" --option "language" "ces"
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
50
22ed5647b235 streamlets: include streamlet-common.sh from the directory where the actual streamlet resides (not where is the symlink, if any)
František Kučera <franta-hg@frantovo.cz>
parents: 49
diff changeset
    23
. "$(dirname "$(realpath "$0")")/streamlet-common.sh"
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
43
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    26
	tesseractLanguage="";
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    27
	for (( i=0; i<${#optionNames[@]}; i++)); do
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    28
		if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    29
			tesseractLanguage+="+${optionValues[$i]}";
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    30
		else
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    31
			echo "Unsupported option: ${optionNames[$i]}" >&2
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    32
		fi
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    33
	done
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    34
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    35
	send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}"    "string"
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
	send WAITING_FOR_INPUT_ATTRIBUTES
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
}
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
43
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    40
	value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")";
42
f1bbcf616269 streamlet examples: pdftotext
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    41
	if   [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi
43
bfc7e5d541c2 streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents: 42
diff changeset
    42
	value="$(echo "$value" | tr -d \\f)"
42
f1bbcf616269 streamlet examples: pdftotext
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    43
	send OUTPUT_ATTRIBUTE "$value"    "$isNull";
33
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    44
	send WAITING_FOR_INPUT_ATTRIBUTES;
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
}
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    46
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    47
initialize
f9cada1d46a4 streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    48
processMessages