diff -r 2354c9058fb6 -r e87c231afb77 src/SchemeHandler.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/SchemeHandler.h Fri Sep 25 01:59:16 2020 +0200 @@ -0,0 +1,321 @@ +/** + * Relational pipes + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +#include "Configuration.h" +#include "SchemeException.h" + +namespace relpipe { +namespace tr { +namespace scheme { + +using namespace std; +using namespace relpipe; +using namespace relpipe::reader; +using namespace relpipe::reader::handlers; + +class SchemeHandler : public RelationalReaderValueHandler { +private: + std::wstring_convert> convertor; // TODO: support also other encodings or use always UTF-8 between C++ and Scheme + + Configuration configuration; + writer::RelationalWriter* relationalWriter; + + RelationConfiguration* currentRelationConfiguration = nullptr; + vector currentReaderMetadata; + vector currentWriterMetadata; + std::map currenVariablesMapping; + integer_t currentAttributeIndex = 0; + boolean_t includeCurrentRecord = false; + + void add(vector& readerAttributes, vector& writerAttributes) { + for (AttributeMetadata readerAttributes : readerAttributes) + writerAttributes.push_back({ + readerAttributes.getAttributeName(), + relationalWriter->toTypeId(readerAttributes.getTypeName()) + }); + } + + void generateVariableMappings() { + currenVariablesMapping.clear(); + for (AttributeMetadata m : currentReaderMetadata) currenVariablesMapping[m.getAttributeName()] = L""; + for (writer::AttributeMetadata m : currentWriterMetadata) currenVariablesMapping[m.attributeName] = L""; + + for (std::pair m : currenVariablesMapping) { + currenVariablesMapping[m.first] = escapeAwkVariableName(m.first); + } + } + + /** + * @param attributeName name from relational pipe + * @return variable name in Scheme + */ + string_t a2v(const string_t& attributeName) { + if (currenVariablesMapping.find(attributeName) != currenVariablesMapping.end()) return currenVariablesMapping[attributeName]; + else throw SchemeException(L"Unable to find value in currenVariablesMapping"); + } + + template bool containsValue(std::map map, V value) { // TODO: common function (Scheme, AWK) + for (std::pair p : map) if (p.second == value) return true; + return false; + } + + string_t escapeAwkVariableName(const string_t& attributeName, bool addPrefix = true) { + std::wregex badCharacters(L"\\s"); + string_t name = std::regex_replace(attributeName, badCharacters, L"-"); + + if (addPrefix) name = L"$" + name; // $ = standard attribute-variable prefix + + if (containsValue(currenVariablesMapping, name)) return escapeAwkVariableName(L"$" + name, false); // $ = different prefix added to distinguish two attributes with ambiguous names + else return name; + + } + + void debugVariableMapping(const string_t& relationName) { + relationalWriter->startRelation(relationName + L".variableMapping",{ + {L"attribute", writer::TypeId::STRING}, + {L"variable", writer::TypeId::STRING}, + }, true); + + for (std::pair m : currenVariablesMapping) { + relationalWriter->writeAttribute(m.first); + relationalWriter->writeAttribute(m.second); + } + } + + SCM toSchemeSymbol(const string_t& name) { + return scm_string_to_symbol(scm_from_locale_string(convertor.to_bytes(name).c_str())); + } + + /** + * @param code scheme source code e.g. (+ 1 2 3) or #t + * @param defaultReturnValue is returned if code is empty + * @return result of code execution or defaultReturnValue + */ + SCM evalSchemeCode(const string_t& code, SCM defaultReturnValue = SCM_BOOL_F) { + if (code.size()) return scm_eval_string(toSchemeValue(&code, typeid (string_t), TypeId::STRING)); + else return defaultReturnValue; + } + + SCM toSchemeValue(const void* value, const std::type_info& typeInfo, TypeId type) { + switch (type) { + case TypeId::BOOLEAN: + { + assert(typeInfo == typeid (boolean_t)); + auto* typedValue = static_cast (value); + return *typedValue ? SCM_BOOL_T : SCM_BOOL_F; + } + case TypeId::INTEGER: + { + assert(typeInfo == typeid (integer_t)); + auto* typedValue = static_cast (value); + return scm_from_int64(*typedValue); + } + case TypeId::STRING: + { + assert(typeInfo == typeid (string_t)); + auto* typedValue = static_cast (value); + return scm_from_locale_string(convertor.to_bytes(*typedValue).c_str()); + } + default: + throw cli::RelpipeCLIException(L"Unsupported type in toSchemeValue()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); + } + } + + void defineSchemeVariable(const string_t& name, const void* value, const std::type_info& typeInfo, TypeId type) { + scm_define(toSchemeSymbol(name), toSchemeValue(value, typeInfo, type)); + } + + /** + * TODO: use a common method + */ + bool parseBoolean(const string_t& value) { + if (value == L"true") return true; + else if (value == L"false") return false; + else throw relpipe::cli::RelpipeCLIException(L"Unable to parse boolean value: " + value + L" (expecting true or false)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS); + } + + void defineSchemeVariable(const DefinitionRecipe& definition) { + switch (relationalWriter->toTypeId(definition.type)) { + case writer::TypeId::BOOLEAN: + { + boolean_t value = parseBoolean(definition.value); + defineSchemeVariable(definition.name, &value, typeid (value), TypeId::BOOLEAN); + break; + } + case writer::TypeId::INTEGER: + { + integer_t value = stol(definition.value); + defineSchemeVariable(definition.name, &value, typeid (value), TypeId::INTEGER); + break; + } + case writer::TypeId::STRING: + { + defineSchemeVariable(definition.name, &definition.value, typeid (definition.value), TypeId::STRING); + break; + } + default: + throw cli::RelpipeCLIException(L"Unsupported type in defineSchemeVariable(): " + definition.type, cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); + } + } + + void undefineSchemeVariable(const string_t& name) { + scm_define(toSchemeSymbol(name), scm_make_undefined_variable()); // undefined != (define n) + // TODO: or use: scm_variable_unset_x() ? + } + + void writeSchemeValueToAttribute(const writer::AttributeMetadata& attribute) { + string_t variableName = a2v(attribute.attributeName); + SCM schemeValue = scm_eval_string(toSchemeValue(&variableName, typeid (variableName), TypeId::STRING)); + + switch (attribute.typeId) { + case writer::TypeId::BOOLEAN: + { + boolean_t value = scm_to_bool(schemeValue); + return relationalWriter->writeAttribute(&value, typeid (value)); + } + case writer::TypeId::INTEGER: + { + integer_t value = scm_to_int64(schemeValue); + return relationalWriter->writeAttribute(&value, typeid (value)); + } + case writer::TypeId::STRING: + { + char* ch = scm_to_locale_string(schemeValue); + string_t value = convertor.from_bytes(ch); + free(ch); + return relationalWriter->writeAttribute(&value, typeid (value)); + } + default: + throw cli::RelpipeCLIException(L"Unsupported type in writeSchemeValueToAttribute()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); + } + } + + /** + * Read from the Scheme variables and write to relational output stream. + */ + void writeCurrentRecord() { + for (auto attribute : currentWriterMetadata) writeSchemeValueToAttribute(attribute); + } + + void writeMoreRecords() { + while (scm_to_bool(evalSchemeCode(currentRelationConfiguration->schemeHasMoreRecords, SCM_BOOL_F))) writeCurrentRecord(); + } + +public: + + SchemeHandler(writer::RelationalWriter* relationalWriter, Configuration& configuration) : relationalWriter(relationalWriter), configuration(configuration) { + } + + void startRelation(string_t name, vector attributes) override { + if (currentRelationConfiguration) { + evalSchemeCode(currentRelationConfiguration->schemeAfterRecords); + writeMoreRecords(); + for (DefinitionRecipe definition : currentRelationConfiguration->definitions) undefineSchemeVariable(definition.name); + } + for (auto attribute : currentReaderMetadata) undefineSchemeVariable(attribute.getAttributeName()); + + for (DefinitionRecipe definition : configuration.definitions) defineSchemeVariable(definition); + + currentRelationConfiguration = nullptr; + for (int i = 0; i < configuration.relationConfigurations.size(); i++) { + if (regex_match(name, wregex(configuration.relationConfigurations[i].relation))) { + currentRelationConfiguration = &configuration.relationConfigurations[i]; + for (DefinitionRecipe definition : currentRelationConfiguration->definitions) defineSchemeVariable(definition); + break; // it there are multiple matches, only the first configuration is used + } + } + + currentReaderMetadata = attributes; + // TODO: move to a reusable method (or use same metadata on both reader and writer side?) + currentWriterMetadata.clear(); + if (currentRelationConfiguration && currentRelationConfiguration->writerMetadata.size()) { + if (currentRelationConfiguration->inputAttributesPrepend) add(currentReaderMetadata, currentWriterMetadata); + currentWriterMetadata.insert(currentWriterMetadata.end(), currentRelationConfiguration->writerMetadata.begin(), currentRelationConfiguration->writerMetadata.end()); + if (currentRelationConfiguration->inputAttributesAppend) add(currentReaderMetadata, currentWriterMetadata); + } else { + add(currentReaderMetadata, currentWriterMetadata); + } + + generateVariableMappings(); + + if (currentRelationConfiguration && currentRelationConfiguration->debugVariableMapping) debugVariableMapping(name); + + if (!currentRelationConfiguration || !currentRelationConfiguration->drop) relationalWriter->startRelation(name, currentWriterMetadata, true); + + if (currentRelationConfiguration) { + // TODO: better variable name, object, function? + defineSchemeVariable(L"relpipe-relation-name", &name, typeid (name), TypeId::STRING); + evalSchemeCode(currentRelationConfiguration->schemeBeforeRecords); + } + } + + void attribute(const void* value, const std::type_info& type) override { + if (currentRelationConfiguration) { + defineSchemeVariable(a2v(currentReaderMetadata[currentAttributeIndex].getAttributeName()), value, type, currentReaderMetadata[currentAttributeIndex].getTypeId()); + + currentAttributeIndex++; + + // TODO: > 0 ?: + if (currentAttributeIndex > 0 && currentAttributeIndex % currentReaderMetadata.size() == 0) { + evalSchemeCode(currentRelationConfiguration->schemeForEach); + includeCurrentRecord = scm_to_bool(evalSchemeCode(currentRelationConfiguration->schemeWhere, SCM_BOOL_T)); + if (includeCurrentRecord && !currentRelationConfiguration->drop) writeCurrentRecord(); + includeCurrentRecord = false; + writeMoreRecords(); + } + + currentAttributeIndex = currentAttributeIndex % currentReaderMetadata.size(); + } else { + relationalWriter->writeAttribute(value, type); + } + } + + void endOfPipe() { + if (currentRelationConfiguration) { + evalSchemeCode(currentRelationConfiguration->schemeAfterRecords); + writeMoreRecords(); + } + } + +}; + +} +} +}