src/GuileHandler.h
author František Kučera <franta-hg@frantovo.cz>
Tue, 22 Oct 2019 22:03:24 +0200
branchv_0
changeset 26 421608ecc12a
parent 23 6ee7a9e311e9
child 29 caee22a6370a
permissions -rw-r--r--
fix license version: GNU GPLv3

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <memory>
#include <string>
#include <vector>
#include <map>
#include <iostream>
#include <sstream>
#include <locale>
#include <codecvt>
#include <regex>
#include <assert.h>

#include <libguile.h>

#include <relpipe/reader/typedefs.h>
#include <relpipe/reader/TypeId.h>
#include <relpipe/reader/handlers/RelationalReaderValueHandler.h>
#include <relpipe/reader/handlers/AttributeMetadata.h>

#include <relpipe/writer/Factory.h>

#include <relpipe/cli/RelpipeCLIException.h>

#include "Configuration.h"
#include "GuileException.h"

namespace relpipe {
namespace tr {
namespace guile {

using namespace std;
using namespace relpipe;
using namespace relpipe::reader;
using namespace relpipe::reader::handlers;

class GuileHandler : public RelationalReaderValueHandler {
private:
	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or use always UTF-8 between C++ and Guile

	Configuration configuration;
	writer::RelationalWriter* relationalWriter;

	RelationConfiguration* currentRelationConfiguration = nullptr;
	vector<AttributeMetadata> currentReaderMetadata;
	vector<writer::AttributeMetadata> currentWriterMetadata;
	std::map<string_t, string_t> currenVariablesMapping;
	integer_t currentAttributeIndex = 0;
	boolean_t includeCurrentRecord = false;

	void add(vector<AttributeMetadata>& readerAttributes, vector<writer::AttributeMetadata>& writerAttributes) {
		for (AttributeMetadata readerAttributes : readerAttributes)
			writerAttributes.push_back({
				readerAttributes.getAttributeName(),
				relationalWriter->toTypeId(readerAttributes.getTypeName())
			});
	}

	void generateVariableMappings() {
		currenVariablesMapping.clear();
		for (AttributeMetadata m : currentReaderMetadata) currenVariablesMapping[m.getAttributeName()] = L"";
		for (writer::AttributeMetadata m : currentWriterMetadata) currenVariablesMapping[m.attributeName] = L"";

		for (std::pair<string_t, string_t> m : currenVariablesMapping) {
			currenVariablesMapping[m.first] = escapeAwkVariableName(m.first);
		}
	}

	/**
	 * @param attributeName name from relational pipe
	 * @return variable name in Guile
	 */
	string_t a2v(const string_t& attributeName) {
		if (currenVariablesMapping.find(attributeName) != currenVariablesMapping.end()) return currenVariablesMapping[attributeName];
		else throw GuileException(L"Unable to find value in currenVariablesMapping");
	}

	template <typename K, typename V> bool containsValue(std::map<K, V> map, V value) { // TODO: common function (Guile, AWK)
		for (std::pair<K, V> p : map) if (p.second == value) return true;
		return false;
	}

	string_t escapeAwkVariableName(const string_t& attributeName, bool addPrefix = true) {
		std::wregex badCharacters(L"\\s");
		string_t name = std::regex_replace(attributeName, badCharacters, L"-");

		if (addPrefix) name = L"$" + name; // $ = standard attribute-variable prefix

		if (containsValue(currenVariablesMapping, name)) return escapeAwkVariableName(L"$" + name, false); // $ = different prefix added to distinguish two attributes with ambiguous names
		else return name;

	}

	void debugVariableMapping(const string_t& relationName) {
		relationalWriter->startRelation(relationName + L".variableMapping",{
			{L"attribute", writer::TypeId::STRING},
			{L"variable", writer::TypeId::STRING},
		}, true);

		for (std::pair<string_t, string_t> m : currenVariablesMapping) {
			relationalWriter->writeAttribute(m.first);
			relationalWriter->writeAttribute(m.second);
		}
	}

	SCM toGuileSymbol(const string_t& name) {
		return scm_string_to_symbol(scm_from_locale_string(convertor.to_bytes(name).c_str()));
	}

	/**
	 * @param code guile source code e.g. (+ 1 2 3) or #t
	 * @param defaultReturnValue is returned if code is empty
	 * @return result of code execution or defaultReturnValue
	 */
	SCM evalGuileCode(const string_t& code, SCM defaultReturnValue = SCM_BOOL_F) {
		if (code.size()) return scm_eval_string(toGuileValue(&code, typeid (string_t), TypeId::STRING));
		else return defaultReturnValue;
	}

	SCM toGuileValue(const void* value, const std::type_info& typeInfo, TypeId type) {
		switch (type) {
			case TypeId::BOOLEAN:
			{
				assert(typeInfo == typeid (boolean_t));
				auto* typedValue = static_cast<const boolean_t*> (value);
				return *typedValue ? SCM_BOOL_T : SCM_BOOL_F;
			}
			case TypeId::INTEGER:
			{
				assert(typeInfo == typeid (integer_t));
				auto* typedValue = static_cast<const integer_t*> (value);
				return scm_from_uint64(*typedValue);
			}
			case TypeId::STRING:
			{
				assert(typeInfo == typeid (string_t));
				auto* typedValue = static_cast<const string_t*> (value);
				return scm_from_locale_string(convertor.to_bytes(*typedValue).c_str());
			}
			default:
				throw cli::RelpipeCLIException(L"Unsupported type in toGuileValue()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
		}
	}

	void defineGuileVariable(const string_t& name, const void* value, const std::type_info& typeInfo, TypeId type) {
		scm_define(toGuileSymbol(name), toGuileValue(value, typeInfo, type));
	}

	/**
	 * TODO: use a common method
	 */
	bool parseBoolean(const string_t& value) {
		if (value == L"true") return true;
		else if (value == L"false") return false;
		else throw relpipe::cli::RelpipeCLIException(L"Unable to parse boolean value: " + value + L" (expecting true or false)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
	}

	void defineGuileVariable(const DefinitionRecipe& definition) {
		switch (relationalWriter->toTypeId(definition.type)) {
			case writer::TypeId::BOOLEAN:
			{
				boolean_t value = parseBoolean(definition.value);
				defineGuileVariable(definition.name, &value, typeid (value), TypeId::BOOLEAN);
				break;
			}
			case writer::TypeId::INTEGER:
			{
				integer_t value = stoul(definition.value);
				defineGuileVariable(definition.name, &value, typeid (value), TypeId::INTEGER);
				break;
			}
			case writer::TypeId::STRING:
			{
				defineGuileVariable(definition.name, &definition.value, typeid (definition.value), TypeId::STRING);
				break;
			}
			default:
				throw cli::RelpipeCLIException(L"Unsupported type in defineGuileVariable(): " + definition.type, cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
		}
	}

	void undefineGuileVariable(const string_t& name) {
		scm_define(toGuileSymbol(name), scm_make_undefined_variable()); // undefined != (define n)
		// TODO: or use: scm_variable_unset_x() ?
	}

	void writeGuileValueToAttribute(const writer::AttributeMetadata& attribute) {
		string_t variableName = a2v(attribute.attributeName);
		SCM guileValue = scm_eval_string(toGuileValue(&variableName, typeid (variableName), TypeId::STRING));

		switch (attribute.typeId) {
			case writer::TypeId::BOOLEAN:
			{
				boolean_t value = scm_to_bool(guileValue);
				return relationalWriter->writeAttribute(&value, typeid (value));
			}
			case writer::TypeId::INTEGER:
			{
				integer_t value = scm_to_uint64(guileValue);
				return relationalWriter->writeAttribute(&value, typeid (value));
			}
			case writer::TypeId::STRING:
			{
				char* ch = scm_to_locale_string(guileValue);
				string_t value = convertor.from_bytes(ch);
				free(ch);
				return relationalWriter->writeAttribute(&value, typeid (value));
			}
			default:
				throw cli::RelpipeCLIException(L"Unsupported type in writeGuileValueToAttribute()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
		}
	}

	/**
	 * Read from the Guile variables and write to relational output stream.
	 */
	void writeCurrentRecord() {
		for (auto attribute : currentWriterMetadata) writeGuileValueToAttribute(attribute);
	}

	void writeMoreRecords() {
		while (scm_to_bool(evalGuileCode(currentRelationConfiguration->guileHasMoreRecords, SCM_BOOL_F))) writeCurrentRecord();
	}

public:

	GuileHandler(writer::RelationalWriter* relationalWriter, Configuration& configuration) : relationalWriter(relationalWriter), configuration(configuration) {
	}

	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
		if (currentRelationConfiguration) {
			evalGuileCode(currentRelationConfiguration->guileAfterRecords);
			writeMoreRecords();
			for (DefinitionRecipe definition : currentRelationConfiguration->definitions) undefineGuileVariable(definition.name);
		}
		for (auto attribute : currentReaderMetadata) undefineGuileVariable(attribute.getAttributeName());

		for (DefinitionRecipe definition : configuration.definitions) defineGuileVariable(definition);

		currentRelationConfiguration = nullptr;
		for (int i = 0; i < configuration.relationConfigurations.size(); i++) {
			if (regex_match(name, wregex(configuration.relationConfigurations[i].relation))) {
				currentRelationConfiguration = &configuration.relationConfigurations[i];
				for (DefinitionRecipe definition : currentRelationConfiguration->definitions) defineGuileVariable(definition);
				break; // it there are multiple matches, only the first configuration is used
			}
		}

		currentReaderMetadata = attributes;
		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)
		currentWriterMetadata.clear();
		if (currentRelationConfiguration && currentRelationConfiguration->writerMetadata.size()) {
			if (currentRelationConfiguration->inputAttributesPrepend) add(currentReaderMetadata, currentWriterMetadata);
			currentWriterMetadata.insert(currentWriterMetadata.end(), currentRelationConfiguration->writerMetadata.begin(), currentRelationConfiguration->writerMetadata.end());
			if (currentRelationConfiguration->inputAttributesAppend) add(currentReaderMetadata, currentWriterMetadata);
		} else {
			add(currentReaderMetadata, currentWriterMetadata);
		}

		generateVariableMappings();

		if (currentRelationConfiguration->debugVariableMapping) debugVariableMapping(name);

		if (!currentRelationConfiguration || !currentRelationConfiguration->drop) relationalWriter->startRelation(name, currentWriterMetadata, true);

		if (currentRelationConfiguration) {
			// TODO: better variable name, object, function?
			defineGuileVariable(L"relpipe-relation-name", &name, typeid (name), TypeId::STRING);
			evalGuileCode(currentRelationConfiguration->guileBeforeRecords);
		}
	}

	void attribute(const void* value, const std::type_info& type) override {
		if (currentRelationConfiguration) {
			defineGuileVariable(a2v(currentReaderMetadata[currentAttributeIndex].getAttributeName()), value, type, currentReaderMetadata[currentAttributeIndex].getTypeId());

			currentAttributeIndex++;

			// TODO: > 0 ?:
			if (currentAttributeIndex > 0 && currentAttributeIndex % currentReaderMetadata.size() == 0) {
				evalGuileCode(currentRelationConfiguration->guileForEach);
				includeCurrentRecord = scm_to_bool(evalGuileCode(currentRelationConfiguration->guileWhere, SCM_BOOL_T));
				if (includeCurrentRecord && !currentRelationConfiguration->drop) writeCurrentRecord();
				includeCurrentRecord = false;
				writeMoreRecords();
			}

			currentAttributeIndex = currentAttributeIndex % currentReaderMetadata.size();
		} else {
			relationalWriter->writeAttribute(value, type);
		}
	}

	void endOfPipe() {
		if (currentRelationConfiguration) {
			evalGuileCode(currentRelationConfiguration->guileAfterRecords);
			writeMoreRecords();
		}
	}

};

}
}
}