/**
* Relational pipes
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <vector>
#include <filesystem>
#include <regex>
#include <memory>
#include <sstream>
#include <unistd.h>
#include <relpipe/writer/typedefs.h>
#include <relpipe/writer/AttributeMetadata.h>
#include <relpipe/writer/RelationalWriter.h>
#include "RequestedField.h"
#include "SubProcess.h"
#include "AttributeFinder.h"
#include "StreamletMsg.h"
namespace relpipe {
namespace in {
namespace filesystem {
namespace fs = std::filesystem;
using namespace relpipe::writer;
class StreamletAttributeFinder : public AttributeFinder {
private:
std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
std::map<int, std::shared_ptr<SubProcess>> subProcesses;
std::map<int, std::vector<AttributeMetadata>> cachedMetadata;
std::vector<string_t> splitBySeparator(const string_t& originalString, const wchar_t separator = L':', const wchar_t escape = L'\\') {
std::vector<string_t> result;
std::wstringstream current;
for (int i = 0, size = originalString.size(); i < size; i++) {
wchar_t ch = originalString[i];
if (ch == escape) {
if (i + 1 < size) {
ch = originalString[++i];
if (ch == separator || ch == escape) current.put(ch);
else RelpipeWriterException(L"Invalid escape sequence at position " + std::to_wstring(i) + L" of: " + originalString);
} else {
throw RelpipeWriterException(L"Invalid use of escape character at the end of: " + originalString);
}
} else if (ch == separator || i + 1 == size) {
if (current.str().size()) result.push_back(current.str());
current.str(L"");
current.clear();
} else {
current.put(ch);
}
}
return result;
}
string_t getStreamletCommand(const RequestedField& field) {
const char* streamletPathChars = getenv("RELPIPE_IN_FILESYSTEM_STREAMLET_PATH");
if (streamletPathChars) {
for (string_t path : splitBySeparator(convertor.from_bytes(streamletPathChars))) {
fs::path file = fs::path(path) / fs::path(field.name);
if (fs::exists(file) && ::access(file.c_str(), X_OK) == 0) return file.wstring(); // n.b. must be set executable using e.g. chmod – files executable through only ACL, are actually not executable
}
throw RelpipeWriterException(L"Streamlet „" + field.name + L"“ was not found at $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH");
} else {
throw RelpipeWriterException(L"Missing environment variable RELPIPE_IN_FILESYSTEM_STREAMLET_PATH → unable to find streamlet.");
}
}
protected:
void startFile(const fs::path& file, const string& fileRaw, bool exists) override {
AttributeFinder::startFile(file, fileRaw, exists);
if (exists) {
for (auto subProcess : subProcesses) {
subProcess.second->write({StreamletMsg::INPUT_ATTRIBUTE, L"0", convertor.from_bytes(currentFileRaw), L"false"}); // index, value, isNull
subProcess.second->write({StreamletMsg::WAITING_FOR_OUTPUT_ATTRIBUTES});
}
}
}
virtual void writeFieldOfExistingFile(RelationalWriter* writer, const string_t& relationName, const RequestedField & field) override {
// TODO: paralelize also over records → fork multiple processes and distribute records across them; then collect results (with a lock)
if (field.group == RequestedField::GROUP_STREAMLET) {
for (auto metadata : cachedMetadata[field.id]) {
SubProcess::Message m = subProcesses[field.id]->read();
if (m.code == StreamletMsg::OUTPUT_ATTRIBUTE) writer->writeAttribute(m.parameters[0]);
else throw RelpipeWriterException(L"Protocol violation from exec sub-process while reading: „" + metadata.attributeName + L"“. Expected OUTPUT_ATTRIBUTE but got: " + m.toString());
}
SubProcess::Message m = subProcesses[field.id]->read();
if (m.code != StreamletMsg::WAITING_FOR_INPUT_ATTRIBUTES) throw RelpipeWriterException(L"Protocol violation from exec sub-process. Expected WAITING_FOR_INPUT_ATTRIBUTES but got: " + m.toString());
// TODO: generic protocol violation error messages / method for checking responses
}
}
public:
virtual vector<AttributeMetadata> toMetadata(RelationalWriter* writer, const string_t& relationName, const RequestedField & field) override {
if (field.group == RequestedField::GROUP_STREAMLET) {
if (cachedMetadata.count(field.id)) {
return cachedMetadata[field.id];
} else {
std::vector<string_t> commandLine = {getStreamletCommand(field)};
std::map<string_t, string_t> environment;
for (auto mn : StreamletMsg::getMessageNames()) {
environment[L"EXEC_MSG_" + mn.second] = std::to_wstring(mn.first);
environment[L"EXEC_MSG_" + std::to_wstring(mn.first)] = mn.second;
}
shared_ptr<SubProcess> subProcess(SubProcess::create(commandLine, environment));
subProcesses[field.id] = subProcess;
string_t version = L"1";
subProcess->write({StreamletMsg::VERSION_SUPPORTED, version});
subProcess->write({StreamletMsg::WAITING_FOR_VERSION});
SubProcess::Message versionMessage = subProcess->read();
if (versionMessage.code == StreamletMsg::VERSION_ACCEPTED && versionMessage.parameters[0] == version) {
subProcess->write({StreamletMsg::RELATION_START, relationName});
subProcess->write({StreamletMsg::INPUT_ATTRIBUTE_METADATA, L"path", L"string"});
for (string_t alias : field.getAliases()) subProcess->write({StreamletMsg::OUTPUT_ATTRIBUTE_ALIAS, alias});
for (int i = 0; i < field.options.size();) subProcess->write({StreamletMsg::OPTION, field.options[i++], field.options[i++]});
subProcess->write({StreamletMsg::WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA});
vector<AttributeMetadata> metadata;
while (true) {
SubProcess::Message m = subProcess->read();
if (m.code == StreamletMsg::OUTPUT_ATTRIBUTE_METADATA) metadata.push_back({m.parameters[0], writer->toTypeId(m.parameters[1])});
else if (m.code == StreamletMsg::WAITING_FOR_INPUT_ATTRIBUTES) break;
}
cachedMetadata[field.id] = metadata;
return metadata;
} else {
throw RelpipeWriterException(L"Incompatible exec sub-process version or message: " + versionMessage.toString());
}
}
} else {
return {};
}
}
virtual ~StreamletAttributeFinder() override {
for (auto s : subProcesses) {
try {
s.second->write({StreamletMsg::RELATION_END});
s.second->wait();
} catch (...) {
std::wcerr << L"Exception caught during closing sub-process #" + std::to_wstring(s.first) + L" and waiting for its end." << std::endl;
}
}
}
};
}
}
}