equal
deleted
inserted
replaced
43 --streamlet tesseract \ |
43 --streamlet tesseract \ |
44 --option 'language' 'eng' \ |
44 --option 'language' 'eng' \ |
45 --as 'ocr_text' \ |
45 --as 'ocr_text' \ |
46 --streamlet pdftotext --as 'pdf_text' \ |
46 --streamlet pdftotext --as 'pdf_text' \ |
47 | relpipe-tr-awk \ |
47 | relpipe-tr-awk \ |
48 --relation filesystem \ |
48 --relation 'filesystem' \ |
49 --where 'path ~ /\.sh$/ || url ~ /alt2xml\.globalcode\.info/ || ocr_text ~ /GNU/ || pdf_text ~ /Sane/' \ |
49 --where 'path ~ /\.sh$/ || url ~ /alt2xml\.globalcode\.info/ || ocr_text ~ /GNU/ || pdf_text ~ /Sane/' \ |
50 | relpipe-tr-cut filesystem 'path|url|width|height|page_count|lines_count' \ |
50 | relpipe-tr-cut --relation 'filesystem' --attribute 'path|url|width|height|page_count|lines_count' \ |
51 | relpipe-out-tabular |
51 | relpipe-out-tabular |
52 |
52 |
53 # if too wide, add: | less -RSi]]></m:pre> |
53 # if too wide, add: | less -RSi]]></m:pre> |
54 |
54 |
55 <p> |
55 <p> |