author | František Kučera <franta-hg@frantovo.cz> |
Fri, 24 Jan 2020 21:05:10 +0100 | |
branch | v_0 |
changeset 59 | 7471529c0d11 |
parent 58 | 4679f67a8324 |
child 61 | 640ba8948d69 |
permissions | -rw-r--r-- |
0 | 1 |
/** |
2 |
* Relational pipes |
|
4 | 3 |
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) |
0 | 4 |
* |
5 |
* This program is free software: you can redistribute it and/or modify |
|
6 |
* it under the terms of the GNU General Public License as published by |
|
24
4353cd19a6b5
fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
7 |
* the Free Software Foundation, version 3 of the License. |
0 | 8 |
* |
9 |
* This program is distributed in the hope that it will be useful, |
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
* GNU General Public License for more details. |
|
13 |
* |
|
14 |
* You should have received a copy of the GNU General Public License |
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 |
*/ |
|
17 |
#pragma once |
|
18 |
||
19 |
#include <cstdlib> |
|
20 |
#include <iostream> |
|
4 | 21 |
#include <sstream> |
0 | 22 |
#include <string> |
23 |
#include <vector> |
|
4 | 24 |
#include <map> |
52
fea625f0a096
parallel processing: prepare infrastructure
František Kučera <franta-hg@frantovo.cz>
parents:
32
diff
changeset
|
25 |
#include <memory> |
0 | 26 |
#include <algorithm> |
2
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
27 |
#include <filesystem> |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
28 |
|
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
29 |
#include <pwd.h> |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
30 |
#include <grp.h> |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
31 |
#include <sys/stat.h> |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
32 |
|
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
33 |
#include <sys/xattr.h> |
0 | 34 |
|
35 |
#include <relpipe/writer/typedefs.h> |
|
36 |
||
4 | 37 |
#include "Configuration.h" |
38 |
#include "AttributeFinder.h" |
|
39 |
#include "FileAttributeFinder.h" |
|
40 |
#include "XattrAttributeFinder.h" |
|
31
c64e1588f428
rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents:
29
diff
changeset
|
41 |
#include "StreamletAttributeFinder.h" |
4 | 42 |
|
0 | 43 |
namespace relpipe { |
44 |
namespace in { |
|
45 |
namespace filesystem { |
|
46 |
||
2
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
47 |
namespace fs = std::filesystem; |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
48 |
using namespace relpipe::writer; |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
49 |
|
54
ef726975c34b
parallel processing: rename FilesystemCommandBase to FilesystemCommand
František Kučera <franta-hg@frantovo.cz>
parents:
52
diff
changeset
|
50 |
class FilesystemCommand { |
52
fea625f0a096
parallel processing: prepare infrastructure
František Kučera <franta-hg@frantovo.cz>
parents:
32
diff
changeset
|
51 |
protected: |
58
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
52 |
std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings. |
2
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
53 |
|
52
fea625f0a096
parallel processing: prepare infrastructure
František Kučera <franta-hg@frantovo.cz>
parents:
32
diff
changeset
|
54 |
std::map<string_t, std::shared_ptr<AttributeFinder>> createAttributeFinders() { |
fea625f0a096
parallel processing: prepare infrastructure
František Kučera <franta-hg@frantovo.cz>
parents:
32
diff
changeset
|
55 |
return { |
fea625f0a096
parallel processing: prepare infrastructure
František Kučera <franta-hg@frantovo.cz>
parents:
32
diff
changeset
|
56 |
{RequestedField::GROUP_FILE, std::make_shared<FileAttributeFinder>()}, |
fea625f0a096
parallel processing: prepare infrastructure
František Kučera <franta-hg@frantovo.cz>
parents:
32
diff
changeset
|
57 |
{RequestedField::GROUP_STREAMLET, std::make_shared<StreamletAttributeFinder>()}, |
fea625f0a096
parallel processing: prepare infrastructure
František Kučera <franta-hg@frantovo.cz>
parents:
32
diff
changeset
|
58 |
{RequestedField::GROUP_XATTR, std::make_shared<XattrAttributeFinder>()}}; |
fea625f0a096
parallel processing: prepare infrastructure
František Kučera <franta-hg@frantovo.cz>
parents:
32
diff
changeset
|
59 |
} |
4 | 60 |
|
2
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
61 |
void reset(std::stringstream& stream) { |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
62 |
stream.str(""); |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
63 |
stream.clear(); |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
64 |
} |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
65 |
|
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
66 |
bool readNext(std::istream& input, std::stringstream& originalName) { |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
67 |
for (char ch; input.get(ch);) { |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
68 |
if (ch == 0) return true; |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
69 |
else originalName << ch; |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
70 |
} |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
71 |
return originalName.tellp(); |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
72 |
} |
f07ed604a0ab
read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
73 |
|
58
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
74 |
string_t fetchRelationName(Configuration* configuration) { |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
75 |
return configuration->relation.empty() ? L"filesystem" : configuration->relation; |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
76 |
} |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
77 |
|
59
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
78 |
void writeHeader(RelationalWriter* writer, std::map<string_t, std::shared_ptr < AttributeFinder>> attributeFinders, string_t relationName, std::vector<RequestedField>* fields, bool writeHeader = true) { |
58
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
79 |
std::vector<AttributeMetadata> attributesMetadata; |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
80 |
for (RequestedField field : *fields) { |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
81 |
std::shared_ptr<AttributeFinder> finder = attributeFinders[field.group]; |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
82 |
if (finder) for (AttributeMetadata m : finder->toMetadata(writer, relationName, field)) attributesMetadata.push_back(m); |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
83 |
else throw RelpipeWriterException(L"Unsupported field group: " + field.group); |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
84 |
} |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
85 |
|
59
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
86 |
writer->startRelation(relationName, attributesMetadata, writeHeader); |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
87 |
} |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
88 |
|
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
89 |
void processSingleFile(std::shared_ptr<RelationalWriter> writer, std::stringstream& originalName, std::map<string_t, std::shared_ptr < AttributeFinder>>&attributeFinders, Configuration& configuration, string_t relationName) { |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
90 |
fs::path file(originalName.str().empty() ? "." : originalName.str()); // interpret empty string as current directory (e.g. result of: find -printf '%P\0') |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
91 |
bool exists = false; |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
92 |
|
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
93 |
try { |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
94 |
exists = fs::exists(file); |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
95 |
} catch (const fs::filesystem_error& e) { |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
96 |
// we probably do not have permissions to given directory → pretend that the file does not exist |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
97 |
} |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
98 |
|
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
99 |
for (auto& finder : attributeFinders) finder.second->startFile(file, originalName.str(), exists); |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
100 |
|
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
101 |
for (RequestedField field : configuration.fields) { |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
102 |
std::shared_ptr<AttributeFinder> finder = attributeFinders[field.group]; // should not be nullptr, because already checked while writing the relation metadata |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
103 |
finder->writeField(writer.get(), relationName, field); |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
104 |
} |
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
105 |
|
7471529c0d11
parallel processing: first working version
František Kučera <franta-hg@frantovo.cz>
parents:
58
diff
changeset
|
106 |
for (auto& finder : attributeFinders) finder.second->endFile(); |
58
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
107 |
} |
4679f67a8324
parallel processing: put some common code in FilesystemCommand + use POSIX semaphores for STDOUT synchronization across sub-processes
František Kučera <franta-hg@frantovo.cz>
parents:
57
diff
changeset
|
108 |
|
0 | 109 |
public: |
110 |
||
54
ef726975c34b
parallel processing: rename FilesystemCommandBase to FilesystemCommand
František Kučera <franta-hg@frantovo.cz>
parents:
52
diff
changeset
|
111 |
virtual ~FilesystemCommand() = default; |
4 | 112 |
|
57
c40a241d6e0c
parallel processing: use directly file descriptors (FD) instead of STDIO streams
František Kučera <franta-hg@frantovo.cz>
parents:
54
diff
changeset
|
113 |
virtual void process(int inputFD, int outputFD, Configuration& configuration) = 0; |
0 | 114 |
}; |
115 |
||
116 |
} |
|
117 |
} |
|
118 |
} |