# HG changeset patch # User František Kučera # Date 1579266453 -3600 # Node ID bfc7e5d541c28cf1a0595222e61df701205aecbf # Parent f1bbcf616269b8573f7602a0e50eafcc90959a9e streamlet examples: tesseract OCR diff -r f1bbcf616269 -r bfc7e5d541c2 streamlet-examples/__relpipe_in_filesystem_script_tesseract --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/streamlet-examples/__relpipe_in_filesystem_script_tesseract Fri Jan 17 14:07:33 2020 +0100 @@ -0,0 +1,48 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract. +# Languages can be specified by: --option "language" "eng" --option "language" "ces" + + +. "$(dirname $0)/streamlet-common.sh" + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { + tesseractLanguage=""; + for (( i=0; i<${#optionNames[@]}; i++)); do + if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then + tesseractLanguage+="+${optionValues[$i]}"; + else + echo "Unsupported option: ${optionNames[$i]}" >&2 + fi + done + + send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}" "string" + send WAITING_FOR_INPUT_ATTRIBUTES +} + +processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { + value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")"; + if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi + value="$(echo "$value" | tr -d \\f)" + send OUTPUT_ATTRIBUTE "$value" "$isNull"; + send WAITING_FOR_INPUT_ATTRIBUTES; +} + +initialize +processMessages