streamlet-examples/__relpipe_in_filesystem_script_tesseract
branchv_0
changeset 43 bfc7e5d541c2
parent 42 f1bbcf616269
equal deleted inserted replaced
42:f1bbcf616269 43:bfc7e5d541c2
       
     1 #!/bin/bash
       
     2 
       
     3 # Relational pipes
       
     4 # Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
       
     5 #
       
     6 # This program is free software: you can redistribute it and/or modify
       
     7 # it under the terms of the GNU General Public License as published by
       
     8 # the Free Software Foundation, version 3 of the License.
       
     9 #
       
    10 # This program is distributed in the hope that it will be useful,
       
    11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
       
    13 # GNU General Public License for more details.
       
    14 #
       
    15 # You should have received a copy of the GNU General Public License
       
    16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
       
    17 
       
    18 
       
    19 # This streamlet provides a single attribute: OCR recognized texf of given image file. It calls the tool tesseract.
       
    20 # Languages can be specified by: --option "language" "eng" --option "language" "ces"
       
    21 
       
    22 
       
    23 . "$(dirname $0)/streamlet-common.sh"
       
    24 
       
    25 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() {
       
    26 	tesseractLanguage="";
       
    27 	for (( i=0; i<${#optionNames[@]}; i++)); do
       
    28 		if [[ "x${optionNames[$i]}" == "xlanguage" ]]; then
       
    29 			tesseractLanguage+="+${optionValues[$i]}";
       
    30 		else
       
    31 			echo "Unsupported option: ${optionNames[$i]}" >&2
       
    32 		fi
       
    33 	done
       
    34 
       
    35 	send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-tesseract}"    "string"
       
    36 	send WAITING_FOR_INPUT_ATTRIBUTES
       
    37 }
       
    38 
       
    39 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() {
       
    40 	value="$(cat "$currentFile" | tesseract stdin stdout "${tesseractLanguage:+-l}" "${tesseractLanguage}")";
       
    41 	if   [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi
       
    42 	value="$(echo "$value" | tr -d \\f)"
       
    43 	send OUTPUT_ATTRIBUTE "$value"    "$isNull";
       
    44 	send WAITING_FOR_INPUT_ATTRIBUTES;
       
    45 }
       
    46 
       
    47 initialize
       
    48 processMessages