author | František Kučera <franta-hg@frantovo.cz> |
Mon, 03 Feb 2020 22:10:07 +0100 | |
branch | v_0 |
changeset 294 | abbc9bcfbcc4 |
permissions | -rwxr-xr-x |
294
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
1 |
#!/bin/bash |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
2 |
|
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
3 |
findFiles() { |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
4 |
find /bin/ -print0; |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
5 |
} |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
6 |
|
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
7 |
fetchAttributes() { |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
8 |
relpipe-in-filesystem \ |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
9 |
--parallel 4 \ |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
10 |
--file path \ |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
11 |
--file type \ |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
12 |
--file size \ |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
13 |
--streamlet hash; |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
14 |
} |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
15 |
|
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
16 |
aggregate() { |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
17 |
relpipe-tr-sql \ |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
18 |
--relation "file_hashes" \ |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
19 |
"SELECT |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
20 |
path, |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
21 |
type, |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
22 |
size, |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
23 |
sha256, |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
24 |
count(*) OVER (PARTITION BY sha256) AS same_hash_count |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
25 |
FROM filesystem |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
26 |
ORDER BY same_hash_count, sha256, path, type"; |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
27 |
} |
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
28 |
|
abbc9bcfbcc4
Release v0.15 – streamlets, parallel processing
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
29 |
findFiles | fetchAttributes | aggregate | relpipe-out-tabular |