diff -r dc5c210295d0 -r f466b4c7d9b1 streamlet-examples/__relpipe_in_filesystem_script_tesseract --- a/streamlet-examples/__relpipe_in_filesystem_script_tesseract Sat Jan 18 16:41:59 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -#!/bin/bash - -# Relational pipes -# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract. -# Languages can be specified by: --option "language" "eng" --option "language" "ces" - - -. "$(dirname $0)/streamlet-common.sh" - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { - tesseractLanguage=""; - for (( i=0; i<${#optionNames[@]}; i++)); do - if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then - tesseractLanguage+="+${optionValues[$i]}"; - else - echo "Unsupported option: ${optionNames[$i]}" >&2 - fi - done - - send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}" "string" - send WAITING_FOR_INPUT_ATTRIBUTES -} - -processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { - value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")"; - if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi - value="$(echo "$value" | tr -d \\f)" - send OUTPUT_ATTRIBUTE "$value" "$isNull"; - send WAITING_FOR_INPUT_ATTRIBUTES; -} - -initialize -processMessages