relpipe-data/examples/parallel-hashes-1.sh
author František Kučera <franta-hg@frantovo.cz>
Mon, 03 Feb 2020 22:10:07 +0100
branchv_0
changeset 294 abbc9bcfbcc4
permissions -rwxr-xr-x
Release v0.15 – streamlets, parallel processing
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
294
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
#!/bin/bash
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
findFiles() {
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
	find /bin/ -print0;
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
}
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
fetchAttributes() {
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
	relpipe-in-filesystem \
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
		--parallel 4 \
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
		--file path \
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
		--file type \
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
		--file size \
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
		--streamlet hash;
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
}
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
aggregate() {
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
	relpipe-tr-sql \
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
		--relation "file_hashes" \
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
		"SELECT
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
			path,
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
			type,
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
			size,
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
			sha256,
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
			count(*) OVER (PARTITION BY sha256) AS same_hash_count
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
		FROM filesystem
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
		ORDER BY same_hash_count, sha256, path, type";
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
}
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
abbc9bcfbcc4 Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
findFiles | fetchAttributes | aggregate | relpipe-out-tabular