XMLTable, SQL, v0.13 v_0 relpipe-v0.13
authorFrantišek Kučera <franta-hg@frantovo.cz>
Wed, 31 Jul 2019 16:01:34 +0200
branchv_0
changeset 264 d39cfc926f95
parent 263 8bf13358a50a
child 265 40ccccecdaa9
XMLTable, SQL, v0.13
relpipe-data/download.xml
relpipe-data/examples-in-xmltable-libvirt.xml
relpipe-data/examples-in-xmltable-tr-sql-mercurial-hg.xml
relpipe-data/examples/mercurial-hg-xmltable-sql.sh
relpipe-data/examples/mercurial-hg-xmltable-sql.txt
relpipe-data/examples/release-v0.13.sh
relpipe-data/implementation.xml
relpipe-data/release-v0.13.xml
relpipe-data/roadmap.xml
--- a/relpipe-data/download.xml	Thu Jul 25 22:16:12 2019 +0200
+++ b/relpipe-data/download.xml	Wed Jul 31 16:01:34 2019 +0200
@@ -40,6 +40,7 @@
 hg clone https://hg.globalcode.info/relpipe/relpipe-tr-guile.cpp;
 hg clone https://hg.globalcode.info/relpipe/relpipe-tr-python.cpp;
 hg clone https://hg.globalcode.info/relpipe/relpipe-tr-sed.cpp;
+hg clone https://hg.globalcode.info/relpipe/relpipe-tr-sql.cpp;
 hg clone https://hg.globalcode.info/relpipe/relpipe-tr-validator.cpp;
 hg clone https://hg.globalcode.info/relpipe/relpipe-web;]]></pre>
 	
--- a/relpipe-data/examples-in-xmltable-libvirt.xml	Thu Jul 25 22:16:12 2019 +0200
+++ b/relpipe-data/examples-in-xmltable-libvirt.xml	Wed Jul 31 16:01:34 2019 +0200
@@ -11,12 +11,6 @@
 		<p>
 			<a href="https://libvirt.org/">Libvirt</a> is a popular API/tool for managing virtual machines (KVM/Qemu, LXC etc.) and stores its configuration in XML files.
 			Thanks to the tool <code>relpipe-in-xmltable</code> we can get aggregated overview of our virtual machines.
-			This tool does similar job like the <a href="https://www.postgresql.org/docs/current/functions-xml.html">xmltable</a> function known from SQL.
-			It uses the <a href="https://www.w3.org/TR/xpath/all/">XPath</a> language for selecting parts of the input XML – one XPath expression points to record nodes
-			and one or more XPath expressions point to attribute nodes/values relatively to particular record node.
-			Our tool is able to produce one or more relations from a single XML input.
-			The input is parsed at once and converted to DOM in memory i.e. no streaming – thus processing of huge XML files requires appropriate amounts of RAM, on the other hand: 
-			our expression can access whole XML document and pick values not only from currently processed record node.
 		</p>
 		
 		
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/relpipe-data/examples-in-xmltable-tr-sql-mercurial-hg.xml	Wed Jul 31 16:01:34 2019 +0200
@@ -0,0 +1,33 @@
+<stránka
+	xmlns="https://trac.frantovo.cz/xml-web-generator/wiki/xmlns/strana"
+	xmlns:m="https://trac.frantovo.cz/xml-web-generator/wiki/xmlns/makro">
+	
+	<nadpis>Reading Mercurial history using XMLTable and SQL</nadpis>
+	<perex>filtering the commit log from hg VCS</perex>
+	<m:pořadí-příkladu>02900</m:pořadí-příkladu>
+
+	<text xmlns="http://www.w3.org/1999/xhtml">
+		
+		<p>
+			The <a href="https://www.mercurial-scm.org/">Mercurial</a> version control system 
+			can provide the repository commit log in a machine readable format – XML.
+			We will transform this XML to relations using the <code>relpipe-in-xmltable</code>
+			and then filter records using the SQL language in the <code>relpipe-tr-sql</code> tool.
+			Everything on-the-fly, in a single shell pipeline. Of course, we can cache intermediate products in a file if needed.
+		</p>
+		
+		<m:pre jazyk="bash" src="examples/mercurial-hg-xmltable-sql.sh"/>
+
+		<p>This script will generate several lovely relations:</p>
+
+		<m:pre jazyk="text" src="examples/mercurial-hg-xmltable-sql.txt"/>
+		
+		<p>
+			Using different output filter than <code>relpipe-out-tabular</code>,
+			we can serialize the relational output into other formats (XML, CSV, Recfile, ODS, ASN.1 BER etc.),
+			display them in a GUI or execute some shell command for each record.
+		</p>
+		
+	</text>
+
+</stránka>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/relpipe-data/examples/mercurial-hg-xmltable-sql.sh	Wed Jul 31 16:01:34 2019 +0200
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+relpipe-in-hglog() {
+	# on repositories with long history 
+	# it is good to process just a subset of the log using hg log parameters
+	hg log --template xml \
+		| relpipe-in-xmltable \
+			--relation hg_log \
+				--records "/log/logentry" \
+				--attribute "revision" integer "@revision" \
+				--attribute "date" string "date" \
+				--attribute "hash" string "@node" \
+				--attribute "hash_short" string "substring(@node,1,12)" \
+				--attribute "branch" string "branch" \
+				--attribute "author_name" string "author" \
+				--attribute "message" string "msg"
+				# --attribute "author_email" string "author/@email" \
+}
+
+relpipe-in-hgtags() {
+	hg tags \
+		| sed -E 's/([^ \s]+)\s+([0-9]+):(.*)/\1\n\2\n\3/g' \
+		| tr \\n \\0 \
+		| relpipe-in-cli \
+			generate-from-stdin hg_tags 3 \
+				tag string \
+				revision integer \
+				hash_short string
+}
+
+
+# just view everything:
+# (relpipe-in-hglog; relpipe-in-hgtags) | relpipe-out-tabular | less -RSi
+
+
+# filter and JOIN data, do some statistics:
+(relpipe-in-hglog; relpipe-in-hgtags) \
+	| relpipe-tr-sql \
+		--relation "tags" \
+			"SELECT
+				t.*,
+				l.date
+			FROM hg_tags AS t
+			JOIN hg_log AS l USING (hash_short)" \
+		--relation "filtered_log" \
+			"SELECT 
+				revision, 
+				date, 
+				hash_short,
+				branch, 
+				author_name
+			FROM hg_log
+			ORDER BY revision DESC
+			LIMIT ?" \
+			--parameter 10 \
+		--relation "author_statistics" \
+			"SELECT
+				author_name,
+				branch,
+				count(*) AS count,
+				min(date) AS first_commit,
+				max(date) AS last_commit
+				-- TODO: user proper data type for date
+			FROM hg_log
+			GROUP BY author_name, branch
+			ORDER BY count" \
+	| relpipe-out-tabular
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/relpipe-data/examples/mercurial-hg-xmltable-sql.txt	Wed Jul 31 16:01:34 2019 +0200
@@ -0,0 +1,42 @@
+tags:
+ ╭───────────────┬───────────────────┬─────────────────────┬───────────────────────────╮
+ │ tag  (string) │ revision (string) │ hash_short (string) │ date             (string) │
+ ├───────────────┼───────────────────┼─────────────────────┼───────────────────────────┤
+ │ tip           │ 263               │ 8bf13358a50a        │ 2019-07-25T22:16:12+02:00 │
+ │ relpipe-v0.12 │ 258               │ 2868d772c27e        │ 2019-05-28T21:18:20+02:00 │
+ │ relpipe-v0.11 │ 256               │ 822ffd23d679        │ 2019-04-09T17:14:05+02:00 │
+ │ relpipe-v0.10 │ 250               │ d16336d1c61f        │ 2019-02-20T11:46:10+01:00 │
+ │ relpipe-v0.9  │ 241               │ f71d300205b7        │ 2019-01-18T21:34:58+01:00 │
+ │ relpipe-v0.8  │ 219               │ a94eb371f77e        │ 2018-12-20T01:42:36+01:00 │
+ │ v0.4          │ 114               │ 9302cf6856ea        │ 2012-07-06T17:04:13+02:00 │
+ │ v0.3          │ 78                │ 7e478bfa5694        │ 2012-01-08T17:26:24+01:00 │
+ │ v0.2          │ 36                │ 5be21d1ef5f8        │ 2011-08-23T20:05:56+02:00 │
+ │ v0.1          │ 19                │ b81b96475fe0        │ 2011-04-30T20:20:54+02:00 │
+ ╰───────────────┴───────────────────┴─────────────────────┴───────────────────────────╯
+Record count: 10
+
+filtered_log:
+ ╭───────────────────┬───────────────────────────┬─────────────────────┬─────────────────┬──────────────────────╮
+ │ revision (string) │ date             (string) │ hash_short (string) │ branch (string) │ author_name (string) │
+ ├───────────────────┼───────────────────────────┼─────────────────────┼─────────────────┼──────────────────────┤
+ │ 263               │ 2019-07-25T22:16:12+02:00 │ 8bf13358a50a        │ v_0             │ František Kučera     │
+ │ 262               │ 2019-07-24T14:18:42+02:00 │ 846510a73535        │ v_0             │ František Kučera     │
+ │ 261               │ 2019-05-28T22:19:59+02:00 │ ffaf18970f2b        │ v_0             │ František Kučera     │
+ │ 260               │ 2019-05-28T22:19:45+02:00 │ 66dffc56bd43        │ v_0             │ František Kučera     │
+ │ 259               │ 2019-05-28T21:52:15+02:00 │ 13a521e9d34d        │ v_0             │ František Kučera     │
+ │ 258               │ 2019-05-28T21:18:20+02:00 │ 2868d772c27e        │ v_0             │ František Kučera     │
+ │ 257               │ 2019-04-09T22:53:40+02:00 │ a39066264509        │ v_0             │ František Kučera     │
+ │ 256               │ 2019-04-09T17:14:05+02:00 │ 822ffd23d679        │ v_0             │ František Kučera     │
+ │ 255               │ 2019-04-08T13:37:35+02:00 │ 94b533007e77        │ v_0             │ František Kučera     │
+ │ 254               │ 2019-04-08T11:56:24+02:00 │ 23247d93a012        │ v_0             │ František Kučera     │
+ ╰───────────────────┴───────────────────────────┴─────────────────────┴─────────────────┴──────────────────────╯
+Record count: 10
+
+author_statistics:
+ ╭──────────────────────┬─────────────────┬────────────────┬───────────────────────────┬───────────────────────────╮
+ │ author_name (string) │ branch (string) │ count (string) │ first_commit     (string) │ last_commit      (string) │
+ ├──────────────────────┼─────────────────┼────────────────┼───────────────────────────┼───────────────────────────┤
+ │ František Kučera     │ v_0             │ 128            │ 2018-11-21T23:52:13+01:00 │ 2019-07-25T22:16:12+02:00 │
+ │ František Kučera     │                 │ 136            │ 2011-04-02T15:36:52+02:00 │ 2014-09-11T20:30:46+02:00 │
+ ╰──────────────────────┴─────────────────┴────────────────┴───────────────────────────┴───────────────────────────╯
+Record count: 2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/relpipe-data/examples/release-v0.13.sh	Wed Jul 31 16:01:34 2019 +0200
@@ -0,0 +1,46 @@
+# Install dependencies as root:
+su -c "apt install g++ make cmake mercurial pkg-config"
+su -c "apt install libxerces-c-dev" # needed only for relpipe-in-xml module
+su -c "apt install guile-2.2-dev"   # needed only for relpipe-tr-guile module; guile-2.0-dev also works but requires a patch (see below)
+su -c "apt install gawk"            # needed only for relpipe-tr-awk module
+su -c "apt install libxml++2.6-dev" # needed only for relpipe-in-xmltable module
+su -c "apt install libsqlite3-dev"  # needed only for relpipe-tr-sql module
+
+# Run rest of installation as a non-root user:
+export RELPIPE_VERSION="v0.13"
+export RELPIPE_SRC=~/src
+export RELPIPE_BUILD=~/build
+export RELPIPE_INSTALL=~/install
+export PKG_CONFIG_PATH="$RELPIPE_INSTALL/lib/pkgconfig/:$PKG_CONFIG_PATH"
+export PATH="$RELPIPE_INSTALL/bin:$PATH"
+
+rm -rf "$RELPIPE_BUILD"/relpipe-*
+mkdir -p "$RELPIPE_SRC" "$RELPIPE_BUILD" "$RELPIPE_INSTALL"
+
+# Helper functions:
+relpipe_download() { for m in "$@"; do cd "$RELPIPE_SRC" && ([[ -d "relpipe-$m.cpp" ]] && hg pull -R "relpipe-$m.cpp" && hg update -R "relpipe-$m.cpp" "$RELPIPE_VERSION" || hg clone -u "$RELPIPE_VERSION" https://hg.globalcode.info/relpipe/relpipe-$m.cpp) || break; done; }
+relpipe_install()  { for m in "$@"; do cd "$RELPIPE_BUILD" && mkdir -p relpipe-$m.cpp && cd relpipe-$m.cpp && cmake -DCMAKE_INSTALL_PREFIX:PATH="$RELPIPE_INSTALL" "$RELPIPE_SRC/relpipe-$m.cpp" && make && make install || break; done; }
+
+# Download all sources:
+relpipe_download lib-protocol lib-reader lib-writer lib-cli lib-xmlwriter in-cli in-fstab in-xml in-xmltable in-csv in-filesystem in-recfile out-gui.qt out-nullbyte out-ods out-tabular out-xml out-csv out-asn1 out-recfile tr-cut tr-grep tr-python tr-sed tr-validator tr-guile tr-awk tr-sql
+
+# Optional: At this point, we have all dependencies and sources downloaded, so we can disconnect this computer from the internet in order to verify that our build process is sane, deterministic and does not depend on any external resources.
+
+# Build and install libraries:
+relpipe_install lib-protocol lib-reader lib-writer lib-cli lib-xmlwriter
+
+# Build and install tools:
+relpipe_install in-fstab in-cli in-fstab in-xml in-xmltable in-csv in-recfile tr-cut tr-grep tr-sed tr-guile tr-awk tr-sql out-nullbyte out-ods out-tabular out-xml out-csv out-asn1 out-recfile
+
+# relpipe_install in-filesystem # requires GCC 8 or patching (see below)
+
+# Clean-up:
+unset -f relpipe_install
+unset -f relpipe_download
+unset -v RELPIPE_VERSION
+unset -v RELPIPE_SRC
+unset -v RELPIPE_BUILD
+unset -v RELPIPE_INSTALL
+
+# Compute statistics of currently mounted filesystems using SQL and view them like on an 80s green screen terminal!
+cat /etc/mtab | relpipe-in-fstab | relpipe-tr-sql --relation mount_statistics "SELECT type, count(*) AS count FROM fstab GROUP BY type ORDER BY count DESC" | relpipe-out-tabular
--- a/relpipe-data/implementation.xml	Thu Jul 25 22:16:12 2019 +0200
+++ b/relpipe-data/implementation.xml	Wed Jul 31 16:01:34 2019 +0200
@@ -40,6 +40,7 @@
 			relpipe-tr-guile.cpp	executable	transformation	c++	GNU GPLv3+
 			relpipe-tr-python.cpp	executable	transformation	c++	GNU GPLv3+
 			relpipe-tr-sed.cpp	executable	transformation	c++	GNU GPLv3+
+			relpipe-tr-sql.cpp	executable	transformation	c++	GNU GPLv3+
 			relpipe-tr-validator.cpp	executable	transformation	c++	GNU GPLv3+
 		</m:tabulka>
 		<!--
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/relpipe-data/release-v0.13.xml	Wed Jul 31 16:01:34 2019 +0200
@@ -0,0 +1,123 @@
+<stránka
+	xmlns="https://trac.frantovo.cz/xml-web-generator/wiki/xmlns/strana"
+	xmlns:m="https://trac.frantovo.cz/xml-web-generator/wiki/xmlns/makro">
+	
+	<nadpis>Release v0.13</nadpis>
+	<perex>sixth public release of Relational pipes</perex>
+	<m:release>v0.13</m:release>
+
+	<text xmlns="http://www.w3.org/1999/xhtml">
+		<p>
+			We are pleased to introduce you the new development version of <m:name/>.
+			This release brings the XMLTable input filter, the SQL transformation and a small improvement in the filesystem input filter: 
+		</p>
+		
+		<ul>
+			<li>
+				<strong>XMLTable input</strong>:
+				This tool does similar job like the <a href="https://www.postgresql.org/docs/current/functions-xml.html">xmltable</a> function known from SQL.
+				It uses the <a href="https://www.w3.org/TR/xpath/all/">XPath</a> language for selecting parts of the input XML – one XPath expression points to record nodes
+				and one or more XPath expressions point to attribute nodes/values relatively to particular record node.
+				Thus it is able to produce one or more relations from an arbitrary XML input.
+				The input is parsed at once and converted to DOM in memory i.e. no streaming – thus processing of huge XML files requires appropriate amounts of RAM, on the other hand: 
+				our expression can access whole XML document and pick values not only from currently processed record node.
+				This tool uses the <a href="http://xmlsoft.org/">Libxml2</a> library (XML parser and XPath processor).
+			</li>
+			
+			<li>
+				<strong>SQL transformation</strong>:
+				SQL is one of most powerful languages for processing relational data and the most widespread one.
+				Now it can be used even on-the-fly in shell pipelines – without having any database server running.
+				It is useful for record filtering, JOINing several relations together, doing aggregations or computations.
+				By default everything is done in memory, but with the <code>--file</code> parameter we can use a temporary file
+				and with <code>--keep-file</code> we can make it not so temporary.
+				This tool uses the <a href="https://www.sqlite.org/">SQLite</a> library.
+			</li>
+			
+			<li>
+				<strong>file system input</strong>:
+				new optional attribute has been added: <code>--file content</code>
+				which allows getting the text content (currently only in the UTF-8 encoding) of the file which allows us using the file system as a simple database.
+			</li>
+			
+		</ul>
+		
+		<p>
+			See the <m:a href="examples">examples</m:a> and <m:a href="screenshots">screenshots</m:a> pages for details.
+		</p>
+		
+		<p>
+			Please note that this is still a development relasease and thus the API (libraries, CLI arguments, formats) might and will change.
+			Any suggestions, ideas and bug reports are welcome in our <m:a href="contact">mailing list</m:a>.
+		</p>
+		
+		<h3>Data types</h3>
+		<ul>
+			<li m:since="v0.8">boolean</li>
+			<li m:since="v0.8">variable unsigned integer (prototype)</li>
+			<li m:since="v0.8">string in UTF-8</li>
+		</ul>
+		<h3>Inputs</h3>
+		<ul>
+			<li m:since="v0.11">Recfile</li>
+			<li m:since="v0.9">XML</li>
+			<li m:since="v0.13">XMLTable</li>
+			<li m:since="v0.9">CSV</li>
+			<li m:since="v0.9">file system</li>
+			<li m:since="v0.8">CLI</li>
+			<li m:since="v0.8">fstab</li>
+		</ul>
+		<h3>Transformations</h3>
+		<ul>
+			<li m:since="v0.13">sql: filtering and transformations using the SQL language</li>
+			<li m:since="v0.12">awk: filtering and transformations using the classic AWK tool and language</li>
+			<li m:since="v0.10">guile: filtering and transformations defined in the Scheme language using GNU Guile</li>
+			<li m:since="v0.8">grep: regular expression filter, removes unwanted records from the relation</li>
+			<li m:since="v0.8">cut: regular expression attribute cutter (removes or duplicates attributes and can also DROP whole relation)</li>
+			<li m:since="v0.8">sed: regular expression replacer</li>
+			<li m:since="v0.8">validator: just a pass-through filter that crashes on invalid data</li>
+			<li m:since="v0.8">python: highly experimental</li>
+		</ul>
+		<h3>Outputs</h3>
+		<ul>
+			<li m:since="v0.11">ASN.1 BER</li>
+			<li m:since="v0.11">Recfile</li>
+			<li m:since="v0.9">CSV</li>
+			<li m:since="v0.8">tabular</li>
+			<li m:since="v0.8">XML</li>
+			<li m:since="v0.8">nullbyte</li>
+			<li m:since="v0.8">GUI in Qt</li>
+			<li m:since="v0.8">ODS (LibreOffice)</li>
+		</ul>
+		
+		<p>
+			Instalation was tested on Debian GNU/Linux 9.6.
+			The process should be similar on other distributions.
+		</p>
+		
+		<m:pre src="examples/release-v0.13.sh" jazyk="bash" odkaz="ano"/>
+		
+		<p>
+			<m:name/> are modular thus you can download and install only parts you need (the libraries are needed always).
+			Tools <code>out-gui.qt</code> and <code>tr-python</code> require additional libraries and are not built by default.
+		</p>
+		
+		<p>
+			The module <code>relpipe-in-filesystem</code> uses C++ filesystem API which is supported since GCC 8.
+			This module can be compiled and seems usable even with GCC 6, but requires some patching (switch to the experimental API):
+		</p>
+		
+		<m:pre jazyk="bash"><![CDATA[sed 's@#include <filesystem>@#include <experimental/filesystem>@g' -i "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/FileAttributeFinder.h "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/XattrAttributeFinder.h "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/FilesystemCommand.h "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/AttributeFinder.h
+sed 's@std::filesystem@std::experimental::filesystem@g' -i "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/FileAttributeFinder.h "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/XattrAttributeFinder.h "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/FilesystemCommand.h "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/AttributeFinder.h
+sed 's/.*PROPERTY CXX_STANDARD.*/#\0/g' -i "$RELPIPE_SRC"/relpipe-in-filesystem.cpp/src/CMakeLists.txt]]></m:pre>
+
+		<p>
+			The module <code>relpipe-tr-guile</code> uses GNU Guile 2.2 but can also work with 2.0.
+			In such case, it requires this patch:
+		</p>
+		
+		<m:pre jazyk="bash"><![CDATA[sed 's/guile-2\.2/guile-2.0/g' -i "$RELPIPE_SRC"/relpipe-tr-guile.cpp/src/CMakeLists.txt]]></m:pre>
+
+	</text>
+
+</stránka>
--- a/relpipe-data/roadmap.xml	Thu Jul 25 22:16:12 2019 +0200
+++ b/relpipe-data/roadmap.xml	Wed Jul 31 16:01:34 2019 +0200
@@ -16,7 +16,7 @@
 			Released versions are described on the <m:a href="download">download</m:a> page.
 		</p>
 		
-		<h2>v0.13, v0.14, v0.15 etc.</h2>
+		<h2>v0.14, v0.15, v0.16 etc.</h2>
 		
 		<p>
 			Releases for discussion and verification of the format and API design.
@@ -47,9 +47,8 @@
 		<p>Probably no new ones before v1.0.0.</p>
 		
 		<h3>Transformations</h3>
-		<ul>
-			<li>SQL: sqlite</li>
-		</ul>
+		<p>Probably no new ones before v1.0.0.</p>
+		
 		<h3>Outputs</h3>
 		<p>Probably no new ones before v1.0.0.</p>