# HG changeset patch # User František Kučera # Date 1549553061 -3600 # Node ID e76ca9f7d6cb983b57fbb2773fee660579ceb942 # Parent 087b8621fb3e34cb1edcd70a0908b39c44e92360 examples: Aggregating data with Guile diff -r 087b8621fb3e -r e76ca9f7d6cb relpipe-data/examples-guile-aggregations.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/relpipe-data/examples-guile-aggregations.xml Thu Feb 07 16:24:21 2019 +0100 @@ -0,0 +1,62 @@ + + + Aggregating data with Guile + counting records and computing sum + 01700 + + + +

+ In relpipe-tr-guile we can generate new records – not only modify records from the input. + There is --has-more-records option which – if evaluated as true – says: „read one more record from the Guile context and call me again“. + We can also suppress all original records by --where '#f'. + And we can also change the structure of the relation (see previous examples). + Thus we can iterate through a relation but completely replace its structure and content. +

+ +

+ What it is good for? We can do aggregations – we can count records, compute sum, maximum, minimum or average value etc. +

+ + + +

Usage example:

+ + + +

+ In SQL same result can be achieved by: +

+ + + +

+ This should be possible with relpipe-tr-sql in later versions. + SQL is much more declarative and for many cases a better tool. + In SQL we describe „how the result should look like“ instead of „how the result should be produced step by step“. +

+ +

+ One day, there might also be a translator that parses SQL code and generates Guile code, + so we could have advantages of both worlds + a) concise and declarative syntax of SQL and + b) streaming – which means no need for putting all the data in the RAM or on the disk. +

+ + + +
+ +
diff -r 087b8621fb3e -r e76ca9f7d6cb relpipe-data/examples/guile-file-count-size-sum.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/relpipe-data/examples/guile-file-count-size-sum.sh Thu Feb 07 16:24:21 2019 +0100 @@ -0,0 +1,28 @@ +#!/bin/bash + +# argument: directory path +# prints file count and sum of file sizes + +find "$1" -type f -print0 \ + | relpipe-in-filesystem \ + --file path \ + --file size \ + | relpipe-tr-guile \ + --relation 'f.*' \ + --output-attribute 'count' integer \ + --output-attribute 'sum' integer \ + --before-records ' + (define $sum 0) + (define $count 0) + (define return-sum #f)' \ + --for-each ' + (set! $sum (+ $sum $size) ) + (set! $count (+ $count 1 ) )' \ + --where '#f' \ + --after-records '(set! return-sum #t)' \ + --has-more-records ' + (if return-sum + (begin (set! return-sum #f) #t) + #f + )' \ + | relpipe-out-tabular