--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/relpipe-data/examples/xhtml-filesystem-xpath.sh Mon Feb 03 22:10:07 2020 +0100
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+XMLNS_H="http://www.w3.org/1999/xhtml"
+
+# If we set xmlns_h="…", we can omit: --option xmlns_h "$XMLNS_H"
+# because XML namespaces can be provided either as an option or as an environmental variable.
+# Options have precedence.
+
+findFiles() {
+ find -print0;
+}
+
+fetchAttributes() {
+ relpipe-in-filesystem \
+ --parallel 8 \
+ --file name \
+ --streamlet xpath \
+ --option xmlns_h "$XMLNS_H" \
+ --option attribute '.' --option mode boolean --as 'valid_xml' \
+ --option attribute 'namespace-uri()' --as 'root_xmlns' \
+ --option attribute '/h:html/h:head/h:title' --as 'title' \
+ --option attribute 'count(//h:h1)' --as 'h1_count' \
+ --option attribute 'count(//h:h2)' --as 'h2_count' \
+ --option attribute 'count(//h:h3)' --as 'h3_count'
+}
+
+filterAndOrder() {
+ relpipe-tr-sql \
+ --relation "pages" \
+ "SELECT
+ name,
+ title,
+ h1_count,
+ h2_count,
+ h3_count
+ FROM filesystem WHERE root_xmlns = ?
+ ORDER BY h1_count + h2_count + h3_count DESC
+ LIMIT 5" \
+ --type-cast 'h1_count' integer \
+ --type-cast 'h2_count' integer \
+ --type-cast 'h3_count' integer \
+ --parameter "$XMLNS_H";
+}
+
+findFiles | fetchAttributes | filterAndOrder | relpipe-out-gui -title "Pages and titles"