|
1 #!/bin/bash |
|
2 |
|
3 # Relational pipes |
|
4 # Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
|
5 # |
|
6 # This program is free software: you can redistribute it and/or modify |
|
7 # it under the terms of the GNU General Public License as published by |
|
8 # the Free Software Foundation, version 3 of the License. |
|
9 # |
|
10 # This program is distributed in the hope that it will be useful, |
|
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 # GNU General Public License for more details. |
|
14 # |
|
15 # You should have received a copy of the GNU General Public License |
|
16 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
17 |
|
18 |
|
19 # This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract. |
|
20 # Languages can be specified by: --option "language" "eng" --option "language" "ces" |
|
21 |
|
22 |
|
23 . "$(dirname $0)/streamlet-common.sh" |
|
24 |
|
25 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { |
|
26 tesseractLanguage=""; |
|
27 for (( i=0; i<${#optionNames[@]}; i++)); do |
|
28 if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then |
|
29 tesseractLanguage+="+${optionValues[$i]}"; |
|
30 else |
|
31 echo "Unsupported option: ${optionNames[$i]}" >&2 |
|
32 fi |
|
33 done |
|
34 |
|
35 send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}" "string" |
|
36 send WAITING_FOR_INPUT_ATTRIBUTES |
|
37 } |
|
38 |
|
39 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { |
|
40 value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")"; |
|
41 if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi |
|
42 value="$(echo "$value" | tr -d \\f)" |
|
43 send OUTPUT_ATTRIBUTE "$value" "$isNull"; |
|
44 send WAITING_FOR_INPUT_ATTRIBUTES; |
|
45 } |
|
46 |
|
47 initialize |
|
48 processMessages |