src/StreamletAttributeFinder.h
author František Kučera <franta-hg@frantovo.cz>
Wed, 29 Jan 2020 00:58:37 +0100
branchv_0
changeset 70 018e2609f5bb
parent 62 a467e8cbd16b
child 96 c34106244a54
permissions -rw-r--r--
streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
4
d44ed75822e7 modular design
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
     3
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
0
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
24
4353cd19a6b5 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
0
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#pragma once
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <vector>
2
f07ed604a0ab read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    20
#include <filesystem>
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    21
#include <regex>
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    22
#include <memory>
46
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    23
#include <sstream>
47
beefddde951e streamlets: find only executable files
František Kučera <franta-hg@frantovo.cz>
parents: 46
diff changeset
    24
#include <unistd.h>
2
f07ed604a0ab read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    25
4
d44ed75822e7 modular design
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    26
#include <relpipe/writer/typedefs.h>
d44ed75822e7 modular design
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    27
#include <relpipe/writer/AttributeMetadata.h>
d44ed75822e7 modular design
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    28
#include <relpipe/writer/RelationalWriter.h>
2
f07ed604a0ab read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    29
4
d44ed75822e7 modular design
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    30
#include "RequestedField.h"
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    31
#include "SubProcess.h"
27
532953173cd5 file hash: md5, sha1, sha256, sha512
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
    32
#include "AttributeFinder.h"
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    33
#include "StreamletMsg.h"
0
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
namespace relpipe {
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
namespace in {
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
namespace filesystem {
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
2
f07ed604a0ab read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    39
namespace fs = std::filesystem;
f07ed604a0ab read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    40
using namespace relpipe::writer;
f07ed604a0ab read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    41
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    42
class StreamletAttributeFinder : public AttributeFinder {
4
d44ed75822e7 modular design
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    43
private:
2
f07ed604a0ab read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    44
	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    45
	std::map<int, std::shared_ptr<SubProcess>> subProcesses;
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    46
	std::map<int, std::vector<AttributeMetadata>> cachedMetadata;
28
9172bd97ae99 custom scripts for additional attributes
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
    47
48
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    48
	std::vector<string_t> splitBySeparator(const string_t& originalString, const wchar_t separator = L':', const wchar_t escape = L'\\') {
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    49
		std::vector<string_t> result;
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    50
		std::wstringstream current;
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    51
		for (int i = 0, size = originalString.size(); i < size; i++) {
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    52
			wchar_t ch = originalString[i];
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    53
			if (ch == escape) {
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    54
				if (i + 1 < size) {
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    55
					ch = originalString[++i];
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    56
					if (ch == separator || ch == escape) current.put(ch);
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    57
					else RelpipeWriterException(L"Invalid escape sequence at position " + std::to_wstring(i) + L" of: " + originalString);
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    58
				} else {
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    59
					throw RelpipeWriterException(L"Invalid use of escape character at the end of: " + originalString);
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    60
				}
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    61
			} else if (ch == separator || i + 1 == size) {
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    62
				if (current.str().size()) result.push_back(current.str());
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    63
				current.str(L"");
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    64
				current.clear();
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    65
			} else {
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    66
				current.put(ch);
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    67
			}
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    68
		}
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    69
		return result;
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    70
	}
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    71
46
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    72
	string_t getStreamletCommand(const RequestedField& field) {
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    73
		const char* streamletPathChars = getenv("RELPIPE_IN_FILESYSTEM_STREAMLET_PATH");
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    74
		if (streamletPathChars) {
48
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    75
			for (string_t path : splitBySeparator(convertor.from_bytes(streamletPathChars))) {
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
    76
				fs::path file = fs::path(path) / fs::path(field.name);
61
640ba8948d69 parallel processing: refactoring: ParallelFilesystemWorker inherits FilesystemWorker
František Kučera <franta-hg@frantovo.cz>
parents: 60
diff changeset
    77
				if (fs::exists(file) && ::access(file.c_str(), X_OK) == 0) return file.wstring(); // n.b. must be set executable using e.g. chmod – files executable through only ACL, are actually not executable
46
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    78
			}
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    79
			throw RelpipeWriterException(L"Streamlet „" + field.name + L"“ was not found at $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH");
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    80
		} else {
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    81
			throw RelpipeWriterException(L"Missing environment variable RELPIPE_IN_FILESYSTEM_STREAMLET_PATH → unable to find streamlet.");
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    82
		}
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
    83
	}
45
f466b4c7d9b1 streamlets: use $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH variable instead of __relpipe_in_filesystem_script_ prefix
František Kučera <franta-hg@frantovo.cz>
parents: 35
diff changeset
    84
70
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    85
	void writeAttribute(RelationalWriter* writer, TypeId typeId, SubProcess::Message* m) {
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    86
		if (m->parameters[1] == L"true") {
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    87
			if (typeId == TypeId::BOOLEAN) writer->writeAttribute(L"false");
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    88
			else if (typeId == TypeId::INTEGER)writer->writeAttribute(L"0");
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    89
			else writer->writeAttribute(L""); // TODO: write acruall null values (when supported)
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    90
		} else {
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    91
			writer->writeAttribute(m->parameters[0]);
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    92
		}
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    93
	}
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
    94
9
b4f29fb16159 process also links to non-existent files and non-readable links
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    95
protected:
b4f29fb16159 process also links to non-existent files and non-readable links
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    96
35
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
    97
	void startFile(const fs::path& file, const string& fileRaw, bool exists) override {
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
    98
		AttributeFinder::startFile(file, fileRaw, exists);
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
    99
		if (exists) {
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
   100
			for (auto subProcess : subProcesses) {
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
   101
				subProcess.second->write({StreamletMsg::INPUT_ATTRIBUTE, L"0", convertor.from_bytes(currentFileRaw), L"false"}); // index, value, isNull
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
   102
				subProcess.second->write({StreamletMsg::WAITING_FOR_OUTPUT_ATTRIBUTES});
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
   103
			}
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
   104
		}
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
   105
	}
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
   106
926eb93c302f streamlets: enable parallel processing
František Kučera <franta-hg@frantovo.cz>
parents: 32
diff changeset
   107
	virtual void writeFieldOfExistingFile(RelationalWriter* writer, const string_t& relationName, const RequestedField & field) override {
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   108
		if (field.group == RequestedField::GROUP_STREAMLET) {
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   109
			for (auto metadata : cachedMetadata[field.id]) {
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   110
				SubProcess::Message m = subProcesses[field.id]->read();
70
018e2609f5bb streamlets: move NULL handling from particular streamlets to StreamletAttributeFinder
František Kučera <franta-hg@frantovo.cz>
parents: 62
diff changeset
   111
				if (m.code == StreamletMsg::OUTPUT_ATTRIBUTE) writeAttribute(writer, metadata.typeId, &m);
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   112
				else throw RelpipeWriterException(L"Protocol violation from exec sub-process while reading: „" + metadata.attributeName + L"“. Expected OUTPUT_ATTRIBUTE but got: " + m.toString());
27
532953173cd5 file hash: md5, sha1, sha256, sha512
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   113
			}
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   114
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   115
			SubProcess::Message m = subProcesses[field.id]->read();
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   116
			if (m.code != StreamletMsg::WAITING_FOR_INPUT_ATTRIBUTES) throw RelpipeWriterException(L"Protocol violation from exec sub-process. Expected WAITING_FOR_INPUT_ATTRIBUTES but got: " + m.toString());
9
b4f29fb16159 process also links to non-existent files and non-readable links
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
   117
		}
b4f29fb16159 process also links to non-existent files and non-readable links
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
   118
	}
b4f29fb16159 process also links to non-existent files and non-readable links
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
   119
0
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   120
public:
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   121
48
26a8b1a14889 streamlets: separate parsing from searching
František Kučera <franta-hg@frantovo.cz>
parents: 47
diff changeset
   122
	virtual vector<AttributeMetadata> toMetadata(RelationalWriter* writer, const string_t& relationName, const RequestedField & field) override {
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   123
		if (field.group == RequestedField::GROUP_STREAMLET) {
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   124
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   125
			if (cachedMetadata.count(field.id)) {
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   126
				return cachedMetadata[field.id];
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   127
			} else {
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   128
46
b5ae61996281 streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH
František Kučera <franta-hg@frantovo.cz>
parents: 45
diff changeset
   129
				std::vector<string_t> commandLine = {getStreamletCommand(field)};
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   130
				std::map<string_t, string_t> environment;
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   131
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   132
				for (auto mn : StreamletMsg::getMessageNames()) {
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   133
					environment[L"EXEC_MSG_" + mn.second] = std::to_wstring(mn.first);
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   134
					environment[L"EXEC_MSG_" + std::to_wstring(mn.first)] = mn.second;
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   135
				}
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   136
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   137
				shared_ptr<SubProcess> subProcess(SubProcess::create(commandLine, environment));
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   138
				subProcesses[field.id] = subProcess;
27
532953173cd5 file hash: md5, sha1, sha256, sha512
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   139
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   140
				string_t version = L"1";
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   141
				subProcess->write({StreamletMsg::VERSION_SUPPORTED, version});
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   142
				subProcess->write({StreamletMsg::WAITING_FOR_VERSION});
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   143
				SubProcess::Message versionMessage = subProcess->read();
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   144
				if (versionMessage.code == StreamletMsg::VERSION_ACCEPTED && versionMessage.parameters[0] == version) {
32
bccda5688d71 propagate the relation name to the finders and streamlets
František Kučera <franta-hg@frantovo.cz>
parents: 31
diff changeset
   145
					subProcess->write({StreamletMsg::RELATION_START, relationName});
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   146
					subProcess->write({StreamletMsg::INPUT_ATTRIBUTE_METADATA, L"path", L"string"});
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   147
					for (string_t alias : field.getAliases()) subProcess->write({StreamletMsg::OUTPUT_ATTRIBUTE_ALIAS, alias});
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   148
					for (int i = 0; i < field.options.size();) subProcess->write({StreamletMsg::OPTION, field.options[i++], field.options[i++]});
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   149
					subProcess->write({StreamletMsg::WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA});
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   150
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   151
					vector<AttributeMetadata> metadata;
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   152
					while (true) {
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   153
						SubProcess::Message m = subProcess->read();
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   154
						if (m.code == StreamletMsg::OUTPUT_ATTRIBUTE_METADATA) metadata.push_back({m.parameters[0], writer->toTypeId(m.parameters[1])});
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   155
						else if (m.code == StreamletMsg::WAITING_FOR_INPUT_ATTRIBUTES) break;
62
a467e8cbd16b parallel processing: optimize flush() + detect another protocol violation
František Kučera <franta-hg@frantovo.cz>
parents: 61
diff changeset
   156
						else throw RelpipeWriterException(L"Protocol violation from exec sub-process while reading output attribute metadata. Expected OUTPUT_ATTRIBUTE_METADATA or WAITING_FOR_INPUT_ATTRIBUTES but got: " + m.toString());
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   157
					}
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   158
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   159
					cachedMetadata[field.id] = metadata;
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   160
					return metadata;
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   161
				} else {
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   162
					throw RelpipeWriterException(L"Incompatible exec sub-process version or message: " + versionMessage.toString());
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   163
				}
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   164
			}
5
ec661baf433a support field aliases
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   165
		} else {
ec661baf433a support field aliases
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   166
			return {};
ec661baf433a support field aliases
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   167
		}
4
d44ed75822e7 modular design
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
   168
	}
2
f07ed604a0ab read filenames separated by a null-byte; fetch their size, owner, group… and some xattr (extended attribute); a preview version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   169
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   170
	virtual ~StreamletAttributeFinder() override {
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   171
		for (auto s : subProcesses) {
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   172
			try {
31
c64e1588f428 rename --exec to --streamlet
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   173
				s.second->write({StreamletMsg::RELATION_END});
29
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   174
				s.second->wait();
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   175
			} catch (...) {
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   176
				std::wcerr << L"Exception caught during closing sub-process #" + std::to_wstring(s.first) + L" and waiting for its end." << std::endl;
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   177
			}
6f15f18d2abf field group --exec, replaces --script and --hash, starts reusable sub-program that returns set of attributes for all records during its runtime
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   178
		}
0
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   179
	}
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   180
};
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   181
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   182
}
467d09b62a12 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   183
}
27
532953173cd5 file hash: md5, sha1, sha256, sha512
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   184
}