equal
deleted
inserted
replaced
1 #!/bin/bash |
|
2 |
|
3 # Relational pipes |
|
4 # Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
|
5 # |
|
6 # This program is free software: you can redistribute it and/or modify |
|
7 # it under the terms of the GNU General Public License as published by |
|
8 # the Free Software Foundation, version 3 of the License. |
|
9 # |
|
10 # This program is distributed in the hope that it will be useful, |
|
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 # GNU General Public License for more details. |
|
14 # |
|
15 # You should have received a copy of the GNU General Public License |
|
16 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
17 |
|
18 |
|
19 # This streamlet provides a single attribute: plain text content of given PDF file. It calls the tool pdftotext. |
|
20 # n.b. the plain text content must fit into memory and shell variable and command-line argument (it usually will) |
|
21 |
|
22 |
|
23 . "$(dirname $0)/streamlet-common.sh" |
|
24 |
|
25 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA() { |
|
26 send OUTPUT_ATTRIBUTE_METADATA "${outputAttributeAliases[0]-pdftotext}" "string" |
|
27 send WAITING_FOR_INPUT_ATTRIBUTES |
|
28 } |
|
29 |
|
30 processMessage_WAITING_FOR_OUTPUT_ATTRIBUTES() { |
|
31 value="$(pdftotext "$currentFile" - | tr -d \\f)"; # tr just removes page breaks |
|
32 if [[ "x$?" == "x0" ]]; then isNull="false"; else value=""; isNull="true"; fi |
|
33 send OUTPUT_ATTRIBUTE "$value" "$isNull"; |
|
34 send WAITING_FOR_INPUT_ATTRIBUTES; |
|
35 } |
|
36 |
|
37 initialize |
|
38 processMessages |
|