src/SchemeHandler.h
branchv_0
changeset 33 e87c231afb77
parent 30 3ce384eaad5c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SchemeHandler.h	Fri Sep 25 01:59:16 2020 +0200
@@ -0,0 +1,321 @@
+/**
+ * Relational pipes
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <map>
+#include <iostream>
+#include <sstream>
+#include <locale>
+#include <codecvt>
+#include <regex>
+#include <assert.h>
+
+#include <libguile.h>
+
+#include <relpipe/reader/typedefs.h>
+#include <relpipe/reader/TypeId.h>
+#include <relpipe/reader/handlers/RelationalReaderValueHandler.h>
+#include <relpipe/reader/handlers/AttributeMetadata.h>
+
+#include <relpipe/writer/Factory.h>
+
+#include <relpipe/cli/RelpipeCLIException.h>
+
+#include "Configuration.h"
+#include "SchemeException.h"
+
+namespace relpipe {
+namespace tr {
+namespace scheme {
+
+using namespace std;
+using namespace relpipe;
+using namespace relpipe::reader;
+using namespace relpipe::reader::handlers;
+
+class SchemeHandler : public RelationalReaderValueHandler {
+private:
+	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or use always UTF-8 between C++ and Scheme
+
+	Configuration configuration;
+	writer::RelationalWriter* relationalWriter;
+
+	RelationConfiguration* currentRelationConfiguration = nullptr;
+	vector<AttributeMetadata> currentReaderMetadata;
+	vector<writer::AttributeMetadata> currentWriterMetadata;
+	std::map<string_t, string_t> currenVariablesMapping;
+	integer_t currentAttributeIndex = 0;
+	boolean_t includeCurrentRecord = false;
+
+	void add(vector<AttributeMetadata>& readerAttributes, vector<writer::AttributeMetadata>& writerAttributes) {
+		for (AttributeMetadata readerAttributes : readerAttributes)
+			writerAttributes.push_back({
+				readerAttributes.getAttributeName(),
+				relationalWriter->toTypeId(readerAttributes.getTypeName())
+			});
+	}
+
+	void generateVariableMappings() {
+		currenVariablesMapping.clear();
+		for (AttributeMetadata m : currentReaderMetadata) currenVariablesMapping[m.getAttributeName()] = L"";
+		for (writer::AttributeMetadata m : currentWriterMetadata) currenVariablesMapping[m.attributeName] = L"";
+
+		for (std::pair<string_t, string_t> m : currenVariablesMapping) {
+			currenVariablesMapping[m.first] = escapeAwkVariableName(m.first);
+		}
+	}
+
+	/**
+	 * @param attributeName name from relational pipe
+	 * @return variable name in Scheme
+	 */
+	string_t a2v(const string_t& attributeName) {
+		if (currenVariablesMapping.find(attributeName) != currenVariablesMapping.end()) return currenVariablesMapping[attributeName];
+		else throw SchemeException(L"Unable to find value in currenVariablesMapping");
+	}
+
+	template <typename K, typename V> bool containsValue(std::map<K, V> map, V value) { // TODO: common function (Scheme, AWK)
+		for (std::pair<K, V> p : map) if (p.second == value) return true;
+		return false;
+	}
+
+	string_t escapeAwkVariableName(const string_t& attributeName, bool addPrefix = true) {
+		std::wregex badCharacters(L"\\s");
+		string_t name = std::regex_replace(attributeName, badCharacters, L"-");
+
+		if (addPrefix) name = L"$" + name; // $ = standard attribute-variable prefix
+
+		if (containsValue(currenVariablesMapping, name)) return escapeAwkVariableName(L"$" + name, false); // $ = different prefix added to distinguish two attributes with ambiguous names
+		else return name;
+
+	}
+
+	void debugVariableMapping(const string_t& relationName) {
+		relationalWriter->startRelation(relationName + L".variableMapping",{
+			{L"attribute", writer::TypeId::STRING},
+			{L"variable", writer::TypeId::STRING},
+		}, true);
+
+		for (std::pair<string_t, string_t> m : currenVariablesMapping) {
+			relationalWriter->writeAttribute(m.first);
+			relationalWriter->writeAttribute(m.second);
+		}
+	}
+
+	SCM toSchemeSymbol(const string_t& name) {
+		return scm_string_to_symbol(scm_from_locale_string(convertor.to_bytes(name).c_str()));
+	}
+
+	/**
+	 * @param code scheme source code e.g. (+ 1 2 3) or #t
+	 * @param defaultReturnValue is returned if code is empty
+	 * @return result of code execution or defaultReturnValue
+	 */
+	SCM evalSchemeCode(const string_t& code, SCM defaultReturnValue = SCM_BOOL_F) {
+		if (code.size()) return scm_eval_string(toSchemeValue(&code, typeid (string_t), TypeId::STRING));
+		else return defaultReturnValue;
+	}
+
+	SCM toSchemeValue(const void* value, const std::type_info& typeInfo, TypeId type) {
+		switch (type) {
+			case TypeId::BOOLEAN:
+			{
+				assert(typeInfo == typeid (boolean_t));
+				auto* typedValue = static_cast<const boolean_t*> (value);
+				return *typedValue ? SCM_BOOL_T : SCM_BOOL_F;
+			}
+			case TypeId::INTEGER:
+			{
+				assert(typeInfo == typeid (integer_t));
+				auto* typedValue = static_cast<const integer_t*> (value);
+				return scm_from_int64(*typedValue);
+			}
+			case TypeId::STRING:
+			{
+				assert(typeInfo == typeid (string_t));
+				auto* typedValue = static_cast<const string_t*> (value);
+				return scm_from_locale_string(convertor.to_bytes(*typedValue).c_str());
+			}
+			default:
+				throw cli::RelpipeCLIException(L"Unsupported type in toSchemeValue()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
+		}
+	}
+
+	void defineSchemeVariable(const string_t& name, const void* value, const std::type_info& typeInfo, TypeId type) {
+		scm_define(toSchemeSymbol(name), toSchemeValue(value, typeInfo, type));
+	}
+
+	/**
+	 * TODO: use a common method
+	 */
+	bool parseBoolean(const string_t& value) {
+		if (value == L"true") return true;
+		else if (value == L"false") return false;
+		else throw relpipe::cli::RelpipeCLIException(L"Unable to parse boolean value: " + value + L" (expecting true or false)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
+	}
+
+	void defineSchemeVariable(const DefinitionRecipe& definition) {
+		switch (relationalWriter->toTypeId(definition.type)) {
+			case writer::TypeId::BOOLEAN:
+			{
+				boolean_t value = parseBoolean(definition.value);
+				defineSchemeVariable(definition.name, &value, typeid (value), TypeId::BOOLEAN);
+				break;
+			}
+			case writer::TypeId::INTEGER:
+			{
+				integer_t value = stol(definition.value);
+				defineSchemeVariable(definition.name, &value, typeid (value), TypeId::INTEGER);
+				break;
+			}
+			case writer::TypeId::STRING:
+			{
+				defineSchemeVariable(definition.name, &definition.value, typeid (definition.value), TypeId::STRING);
+				break;
+			}
+			default:
+				throw cli::RelpipeCLIException(L"Unsupported type in defineSchemeVariable(): " + definition.type, cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
+		}
+	}
+
+	void undefineSchemeVariable(const string_t& name) {
+		scm_define(toSchemeSymbol(name), scm_make_undefined_variable()); // undefined != (define n)
+		// TODO: or use: scm_variable_unset_x() ?
+	}
+
+	void writeSchemeValueToAttribute(const writer::AttributeMetadata& attribute) {
+		string_t variableName = a2v(attribute.attributeName);
+		SCM schemeValue = scm_eval_string(toSchemeValue(&variableName, typeid (variableName), TypeId::STRING));
+
+		switch (attribute.typeId) {
+			case writer::TypeId::BOOLEAN:
+			{
+				boolean_t value = scm_to_bool(schemeValue);
+				return relationalWriter->writeAttribute(&value, typeid (value));
+			}
+			case writer::TypeId::INTEGER:
+			{
+				integer_t value = scm_to_int64(schemeValue);
+				return relationalWriter->writeAttribute(&value, typeid (value));
+			}
+			case writer::TypeId::STRING:
+			{
+				char* ch = scm_to_locale_string(schemeValue);
+				string_t value = convertor.from_bytes(ch);
+				free(ch);
+				return relationalWriter->writeAttribute(&value, typeid (value));
+			}
+			default:
+				throw cli::RelpipeCLIException(L"Unsupported type in writeSchemeValueToAttribute()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
+		}
+	}
+
+	/**
+	 * Read from the Scheme variables and write to relational output stream.
+	 */
+	void writeCurrentRecord() {
+		for (auto attribute : currentWriterMetadata) writeSchemeValueToAttribute(attribute);
+	}
+
+	void writeMoreRecords() {
+		while (scm_to_bool(evalSchemeCode(currentRelationConfiguration->schemeHasMoreRecords, SCM_BOOL_F))) writeCurrentRecord();
+	}
+
+public:
+
+	SchemeHandler(writer::RelationalWriter* relationalWriter, Configuration& configuration) : relationalWriter(relationalWriter), configuration(configuration) {
+	}
+
+	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
+		if (currentRelationConfiguration) {
+			evalSchemeCode(currentRelationConfiguration->schemeAfterRecords);
+			writeMoreRecords();
+			for (DefinitionRecipe definition : currentRelationConfiguration->definitions) undefineSchemeVariable(definition.name);
+		}
+		for (auto attribute : currentReaderMetadata) undefineSchemeVariable(attribute.getAttributeName());
+
+		for (DefinitionRecipe definition : configuration.definitions) defineSchemeVariable(definition);
+
+		currentRelationConfiguration = nullptr;
+		for (int i = 0; i < configuration.relationConfigurations.size(); i++) {
+			if (regex_match(name, wregex(configuration.relationConfigurations[i].relation))) {
+				currentRelationConfiguration = &configuration.relationConfigurations[i];
+				for (DefinitionRecipe definition : currentRelationConfiguration->definitions) defineSchemeVariable(definition);
+				break; // it there are multiple matches, only the first configuration is used
+			}
+		}
+
+		currentReaderMetadata = attributes;
+		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)
+		currentWriterMetadata.clear();
+		if (currentRelationConfiguration && currentRelationConfiguration->writerMetadata.size()) {
+			if (currentRelationConfiguration->inputAttributesPrepend) add(currentReaderMetadata, currentWriterMetadata);
+			currentWriterMetadata.insert(currentWriterMetadata.end(), currentRelationConfiguration->writerMetadata.begin(), currentRelationConfiguration->writerMetadata.end());
+			if (currentRelationConfiguration->inputAttributesAppend) add(currentReaderMetadata, currentWriterMetadata);
+		} else {
+			add(currentReaderMetadata, currentWriterMetadata);
+		}
+
+		generateVariableMappings();
+
+		if (currentRelationConfiguration && currentRelationConfiguration->debugVariableMapping) debugVariableMapping(name);
+
+		if (!currentRelationConfiguration || !currentRelationConfiguration->drop) relationalWriter->startRelation(name, currentWriterMetadata, true);
+
+		if (currentRelationConfiguration) {
+			// TODO: better variable name, object, function?
+			defineSchemeVariable(L"relpipe-relation-name", &name, typeid (name), TypeId::STRING);
+			evalSchemeCode(currentRelationConfiguration->schemeBeforeRecords);
+		}
+	}
+
+	void attribute(const void* value, const std::type_info& type) override {
+		if (currentRelationConfiguration) {
+			defineSchemeVariable(a2v(currentReaderMetadata[currentAttributeIndex].getAttributeName()), value, type, currentReaderMetadata[currentAttributeIndex].getTypeId());
+
+			currentAttributeIndex++;
+
+			// TODO: > 0 ?:
+			if (currentAttributeIndex > 0 && currentAttributeIndex % currentReaderMetadata.size() == 0) {
+				evalSchemeCode(currentRelationConfiguration->schemeForEach);
+				includeCurrentRecord = scm_to_bool(evalSchemeCode(currentRelationConfiguration->schemeWhere, SCM_BOOL_T));
+				if (includeCurrentRecord && !currentRelationConfiguration->drop) writeCurrentRecord();
+				includeCurrentRecord = false;
+				writeMoreRecords();
+			}
+
+			currentAttributeIndex = currentAttributeIndex % currentReaderMetadata.size();
+		} else {
+			relationalWriter->writeAttribute(value, type);
+		}
+	}
+
+	void endOfPipe() {
+		if (currentRelationConfiguration) {
+			evalSchemeCode(currentRelationConfiguration->schemeAfterRecords);
+			writeMoreRecords();
+		}
+	}
+
+};
+
+}
+}
+}