src/StreamletAttributeFinder.h
author František Kučera <franta-hg@frantovo.cz>
Sun, 19 Jan 2020 00:57:22 +0100
branchv_0
changeset 46 b5ae61996281
parent 45 f466b4c7d9b1
child 47 beefddde951e
permissions -rw-r--r--
streamlets: fix command name collisions, do not prepend $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH to the $PATH (a streamlet may call a command with the same name as the streamlet name, which will lead unwanted recursion)

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <vector>
#include <filesystem>
#include <regex>
#include <memory>
#include <sstream>

#include <relpipe/writer/typedefs.h>
#include <relpipe/writer/AttributeMetadata.h>
#include <relpipe/writer/RelationalWriter.h>

#include "RequestedField.h"
#include "SubProcess.h"
#include "AttributeFinder.h"
#include "StreamletMsg.h"

namespace relpipe {
namespace in {
namespace filesystem {

namespace fs = std::filesystem;
using namespace relpipe::writer;

class StreamletAttributeFinder : public AttributeFinder {
private:
	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
	std::map<int, std::shared_ptr<SubProcess>> subProcesses;
	std::map<int, std::vector<AttributeMetadata>> cachedMetadata;

	string_t getStreamletCommand(const RequestedField& field) {
		const char* streamletPathChars = getenv("RELPIPE_IN_FILESYSTEM_STREAMLET_PATH");
		if (streamletPathChars) {
			std::wstringstream current;
			string_t streamletPath = convertor.from_bytes(streamletPathChars);
			for (int i = 0, streamletPathSize = streamletPath.size(); i < streamletPathSize; i++) {
				if (streamletPath[i] == ':' || i == streamletPathSize - 1) { // FIXME: support \: and \\ escaping
					current << L"/" << field.name;
					fs::path streamletFile(current.str());
					if (fs::exists(streamletFile)) { // FIXME: check executable bit
						return current.str();
					} else {
						current.str(L"");
						current.clear();
					}
				} else {
					current.put(streamletPath[i]);
				}
			}
			throw RelpipeWriterException(L"Streamlet „" + field.name + L"“ was not found at $RELPIPE_IN_FILESYSTEM_STREAMLET_PATH");
		} else {
			throw RelpipeWriterException(L"Missing environment variable RELPIPE_IN_FILESYSTEM_STREAMLET_PATH → unable to find streamlet.");
		}
	}

protected:

	void startFile(const fs::path& file, const string& fileRaw, bool exists) override {
		AttributeFinder::startFile(file, fileRaw, exists);
		if (exists) {
			for (auto subProcess : subProcesses) {
				subProcess.second->write({StreamletMsg::INPUT_ATTRIBUTE, L"0", convertor.from_bytes(currentFileRaw), L"false"}); // index, value, isNull
				subProcess.second->write({StreamletMsg::WAITING_FOR_OUTPUT_ATTRIBUTES});
			}
		}
	}

	virtual void writeFieldOfExistingFile(RelationalWriter* writer, const string_t& relationName, const RequestedField & field) override {
		// TODO: paralelize also over records → fork multiple processes and distribute records across them; then collect results (with a lock)
		if (field.group == RequestedField::GROUP_STREAMLET) {
			for (auto metadata : cachedMetadata[field.id]) {
				SubProcess::Message m = subProcesses[field.id]->read();
				if (m.code == StreamletMsg::OUTPUT_ATTRIBUTE) writer->writeAttribute(m.parameters[0]);
				else throw RelpipeWriterException(L"Protocol violation from exec sub-process while reading: „" + metadata.attributeName + L"“. Expected OUTPUT_ATTRIBUTE but got: " + m.toString());
			}

			SubProcess::Message m = subProcesses[field.id]->read();
			if (m.code != StreamletMsg::WAITING_FOR_INPUT_ATTRIBUTES) throw RelpipeWriterException(L"Protocol violation from exec sub-process. Expected WAITING_FOR_INPUT_ATTRIBUTES but got: " + m.toString());
			// TODO: generic protocol violation error messages / method for checking responses
		}
	}

public:

	static const string_t SCRIPT_PREFIX;

	virtual vector<AttributeMetadata> toMetadata(RelationalWriter* writer, const string_t& relationName, const RequestedField& field) override {
		if (field.group == RequestedField::GROUP_STREAMLET) {

			if (cachedMetadata.count(field.id)) {
				return cachedMetadata[field.id];
			} else {

				std::vector<string_t> commandLine = {getStreamletCommand(field)};
				std::map<string_t, string_t> environment;

				for (auto mn : StreamletMsg::getMessageNames()) {
					environment[L"EXEC_MSG_" + mn.second] = std::to_wstring(mn.first);
					environment[L"EXEC_MSG_" + std::to_wstring(mn.first)] = mn.second;
				}

				shared_ptr<SubProcess> subProcess(SubProcess::create(commandLine, environment));
				subProcesses[field.id] = subProcess;

				string_t version = L"1";
				subProcess->write({StreamletMsg::VERSION_SUPPORTED, version});
				subProcess->write({StreamletMsg::WAITING_FOR_VERSION});
				SubProcess::Message versionMessage = subProcess->read();
				if (versionMessage.code == StreamletMsg::VERSION_ACCEPTED && versionMessage.parameters[0] == version) {
					subProcess->write({StreamletMsg::RELATION_START, relationName});
					subProcess->write({StreamletMsg::INPUT_ATTRIBUTE_METADATA, L"path", L"string"});
					for (string_t alias : field.getAliases()) subProcess->write({StreamletMsg::OUTPUT_ATTRIBUTE_ALIAS, alias});
					for (int i = 0; i < field.options.size();) subProcess->write({StreamletMsg::OPTION, field.options[i++], field.options[i++]});
					subProcess->write({StreamletMsg::WAITING_FOR_OUTPUT_ATTRIBUTES_METADATA});

					vector<AttributeMetadata> metadata;
					while (true) {
						SubProcess::Message m = subProcess->read();
						if (m.code == StreamletMsg::OUTPUT_ATTRIBUTE_METADATA) metadata.push_back({m.parameters[0], writer->toTypeId(m.parameters[1])});
						else if (m.code == StreamletMsg::WAITING_FOR_INPUT_ATTRIBUTES) break;
					}

					cachedMetadata[field.id] = metadata;
					return metadata;
				} else {
					throw RelpipeWriterException(L"Incompatible exec sub-process version or message: " + versionMessage.toString());
				}
			}
		} else {
			return {};
		}
	}

	virtual ~StreamletAttributeFinder() override {
		for (auto s : subProcesses) {
			try {
				s.second->write({StreamletMsg::RELATION_END});
				s.second->wait();
			} catch (...) {
				std::wcerr << L"Exception caught during closing sub-process #" + std::to_wstring(s.first) + L" and waiting for its end." << std::endl;
			}
		}
	}
};

const relpipe::writer::string_t StreamletAttributeFinder::SCRIPT_PREFIX = L"__relpipe_in_filesystem_script_";

}
}
}