author | František Kučera <franta-hg@frantovo.cz> |
Thu, 30 Jan 2020 23:27:49 +0100 | |
branch | v_0 |
changeset 78 | 5a63bf594f53 |
parent 50 | 22ed5647b235 |
permissions | -rwxr-xr-x |
33
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
1 |
#!/bin/bash |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
2 |
|
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
3 |
# Relational pipes |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
4 |
# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
5 |
# |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
6 |
# This program is free software: you can redistribute it and/or modify |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
7 |
# it under the terms of the GNU General Public License as published by |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
8 |
# the Free Software Foundation, version 3 of the License. |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
9 |
# |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
10 |
# This program is distributed in the hope that it will be useful, |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
11 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
12 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
13 |
# GNU General Public License for more details. |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
14 |
# |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
15 |
# You should have received a copy of the GNU General Public License |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
16 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
17 |
|
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
18 |
|
43
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
19 |
# This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract. |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
20 |
# Languages can be specified by: --option "language" "eng" --option "language" "ces" |
33
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
21 |
|
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
22 |
|
50
22ed5647b235
streamlets: include streamlet-common.sh from the directory where the actual streamlet resides (not where is the symlink, if any)
František Kučera <franta-hg@frantovo.cz>
parents:
49
diff
changeset
|
23 |
. "$(dirname "$(realpath "$0")")/streamlet-common.sh" |
33
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
24 |
|
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
25 |
processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { |
43
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
26 |
tesseractLanguage=""; |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
27 |
for (( i=0; i<${#optionNames[@]}; i++)); do |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
28 |
if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
29 |
tesseractLanguage+="+${optionValues[$i]}"; |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
30 |
else |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
31 |
echo "Unsupported option: ${optionNames[$i]}" >&2 |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
32 |
fi |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
33 |
done |
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
34 |
|
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
35 |
send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}" "string" |
33
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
36 |
send WAITING_FOR_INPUT_ATTRIBUTES |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
37 |
} |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
38 |
|
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
39 |
processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { |
43
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
40 |
value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")"; |
42
f1bbcf616269
streamlet examples: pdftotext
František Kučera <franta-hg@frantovo.cz>
parents:
33
diff
changeset
|
41 |
if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi |
43
bfc7e5d541c2
streamlet examples: tesseract OCR
František Kučera <franta-hg@frantovo.cz>
parents:
42
diff
changeset
|
42 |
value="$(echo "$value" | tr -d \\f)" |
42
f1bbcf616269
streamlet examples: pdftotext
František Kučera <franta-hg@frantovo.cz>
parents:
33
diff
changeset
|
43 |
send OUTPUT_ATTRIBUTE "$value" "$isNull"; |
33
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
44 |
send WAITING_FOR_INPUT_ATTRIBUTES; |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
45 |
} |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
46 |
|
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
47 |
initialize |
f9cada1d46a4
streamlet examples: common functions + inode, lines_count, mime_type
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
48 |
processMessages |