equal
deleted
inserted
replaced
|
1 #!/bin/bash |
|
2 |
|
3 XMLNS_H="http://www.w3.org/1999/xhtml" |
|
4 |
|
5 # If we set xmlns_h="…", we can omit: --option xmlns_h "$XMLNS_H" |
|
6 # because XML namespaces can be provided either as an option or as an environmental variable. |
|
7 # Options have precedence. |
|
8 |
|
9 findFiles() { |
|
10 find -print0; |
|
11 } |
|
12 |
|
13 fetchAttributes() { |
|
14 relpipe-in-filesystem \ |
|
15 --parallel 8 \ |
|
16 --file name \ |
|
17 --streamlet xpath \ |
|
18 --option xmlns_h "$XMLNS_H" \ |
|
19 --option attribute '.' --option mode boolean --as 'valid_xml' \ |
|
20 --option attribute 'namespace-uri()' --as 'root_xmlns' \ |
|
21 --option attribute '/h:html/h:head/h:title' --as 'title' \ |
|
22 --option attribute 'count(//h:h1)' --as 'h1_count' \ |
|
23 --option attribute 'count(//h:h2)' --as 'h2_count' \ |
|
24 --option attribute 'count(//h:h3)' --as 'h3_count' |
|
25 } |
|
26 |
|
27 filterAndOrder() { |
|
28 relpipe-tr-sql \ |
|
29 --relation "pages" \ |
|
30 "SELECT |
|
31 name, |
|
32 title, |
|
33 h1_count, |
|
34 h2_count, |
|
35 h3_count |
|
36 FROM filesystem WHERE root_xmlns = ? |
|
37 ORDER BY h1_count + h2_count + h3_count DESC |
|
38 LIMIT 5" \ |
|
39 --type-cast 'h1_count' integer \ |
|
40 --type-cast 'h2_count' integer \ |
|
41 --type-cast 'h3_count' integer \ |
|
42 --parameter "$XMLNS_H"; |
|
43 } |
|
44 |
|
45 findFiles | fetchAttributes | filterAndOrder | relpipe-out-gui -title "Pages and titles" |