--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SchemeHandler.h Fri Sep 25 01:59:16 2020 +0200
@@ -0,0 +1,321 @@
+/**
+ * Relational pipes
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <map>
+#include <iostream>
+#include <sstream>
+#include <locale>
+#include <codecvt>
+#include <regex>
+#include <assert.h>
+
+#include <libguile.h>
+
+#include <relpipe/reader/typedefs.h>
+#include <relpipe/reader/TypeId.h>
+#include <relpipe/reader/handlers/RelationalReaderValueHandler.h>
+#include <relpipe/reader/handlers/AttributeMetadata.h>
+
+#include <relpipe/writer/Factory.h>
+
+#include <relpipe/cli/RelpipeCLIException.h>
+
+#include "Configuration.h"
+#include "SchemeException.h"
+
+namespace relpipe {
+namespace tr {
+namespace scheme {
+
+using namespace std;
+using namespace relpipe;
+using namespace relpipe::reader;
+using namespace relpipe::reader::handlers;
+
+class SchemeHandler : public RelationalReaderValueHandler {
+private:
+ std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or use always UTF-8 between C++ and Scheme
+
+ Configuration configuration;
+ writer::RelationalWriter* relationalWriter;
+
+ RelationConfiguration* currentRelationConfiguration = nullptr;
+ vector<AttributeMetadata> currentReaderMetadata;
+ vector<writer::AttributeMetadata> currentWriterMetadata;
+ std::map<string_t, string_t> currenVariablesMapping;
+ integer_t currentAttributeIndex = 0;
+ boolean_t includeCurrentRecord = false;
+
+ void add(vector<AttributeMetadata>& readerAttributes, vector<writer::AttributeMetadata>& writerAttributes) {
+ for (AttributeMetadata readerAttributes : readerAttributes)
+ writerAttributes.push_back({
+ readerAttributes.getAttributeName(),
+ relationalWriter->toTypeId(readerAttributes.getTypeName())
+ });
+ }
+
+ void generateVariableMappings() {
+ currenVariablesMapping.clear();
+ for (AttributeMetadata m : currentReaderMetadata) currenVariablesMapping[m.getAttributeName()] = L"";
+ for (writer::AttributeMetadata m : currentWriterMetadata) currenVariablesMapping[m.attributeName] = L"";
+
+ for (std::pair<string_t, string_t> m : currenVariablesMapping) {
+ currenVariablesMapping[m.first] = escapeAwkVariableName(m.first);
+ }
+ }
+
+ /**
+ * @param attributeName name from relational pipe
+ * @return variable name in Scheme
+ */
+ string_t a2v(const string_t& attributeName) {
+ if (currenVariablesMapping.find(attributeName) != currenVariablesMapping.end()) return currenVariablesMapping[attributeName];
+ else throw SchemeException(L"Unable to find value in currenVariablesMapping");
+ }
+
+ template <typename K, typename V> bool containsValue(std::map<K, V> map, V value) { // TODO: common function (Scheme, AWK)
+ for (std::pair<K, V> p : map) if (p.second == value) return true;
+ return false;
+ }
+
+ string_t escapeAwkVariableName(const string_t& attributeName, bool addPrefix = true) {
+ std::wregex badCharacters(L"\\s");
+ string_t name = std::regex_replace(attributeName, badCharacters, L"-");
+
+ if (addPrefix) name = L"$" + name; // $ = standard attribute-variable prefix
+
+ if (containsValue(currenVariablesMapping, name)) return escapeAwkVariableName(L"$" + name, false); // $ = different prefix added to distinguish two attributes with ambiguous names
+ else return name;
+
+ }
+
+ void debugVariableMapping(const string_t& relationName) {
+ relationalWriter->startRelation(relationName + L".variableMapping",{
+ {L"attribute", writer::TypeId::STRING},
+ {L"variable", writer::TypeId::STRING},
+ }, true);
+
+ for (std::pair<string_t, string_t> m : currenVariablesMapping) {
+ relationalWriter->writeAttribute(m.first);
+ relationalWriter->writeAttribute(m.second);
+ }
+ }
+
+ SCM toSchemeSymbol(const string_t& name) {
+ return scm_string_to_symbol(scm_from_locale_string(convertor.to_bytes(name).c_str()));
+ }
+
+ /**
+ * @param code scheme source code e.g. (+ 1 2 3) or #t
+ * @param defaultReturnValue is returned if code is empty
+ * @return result of code execution or defaultReturnValue
+ */
+ SCM evalSchemeCode(const string_t& code, SCM defaultReturnValue = SCM_BOOL_F) {
+ if (code.size()) return scm_eval_string(toSchemeValue(&code, typeid (string_t), TypeId::STRING));
+ else return defaultReturnValue;
+ }
+
+ SCM toSchemeValue(const void* value, const std::type_info& typeInfo, TypeId type) {
+ switch (type) {
+ case TypeId::BOOLEAN:
+ {
+ assert(typeInfo == typeid (boolean_t));
+ auto* typedValue = static_cast<const boolean_t*> (value);
+ return *typedValue ? SCM_BOOL_T : SCM_BOOL_F;
+ }
+ case TypeId::INTEGER:
+ {
+ assert(typeInfo == typeid (integer_t));
+ auto* typedValue = static_cast<const integer_t*> (value);
+ return scm_from_int64(*typedValue);
+ }
+ case TypeId::STRING:
+ {
+ assert(typeInfo == typeid (string_t));
+ auto* typedValue = static_cast<const string_t*> (value);
+ return scm_from_locale_string(convertor.to_bytes(*typedValue).c_str());
+ }
+ default:
+ throw cli::RelpipeCLIException(L"Unsupported type in toSchemeValue()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
+ }
+ }
+
+ void defineSchemeVariable(const string_t& name, const void* value, const std::type_info& typeInfo, TypeId type) {
+ scm_define(toSchemeSymbol(name), toSchemeValue(value, typeInfo, type));
+ }
+
+ /**
+ * TODO: use a common method
+ */
+ bool parseBoolean(const string_t& value) {
+ if (value == L"true") return true;
+ else if (value == L"false") return false;
+ else throw relpipe::cli::RelpipeCLIException(L"Unable to parse boolean value: " + value + L" (expecting true or false)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
+ }
+
+ void defineSchemeVariable(const DefinitionRecipe& definition) {
+ switch (relationalWriter->toTypeId(definition.type)) {
+ case writer::TypeId::BOOLEAN:
+ {
+ boolean_t value = parseBoolean(definition.value);
+ defineSchemeVariable(definition.name, &value, typeid (value), TypeId::BOOLEAN);
+ break;
+ }
+ case writer::TypeId::INTEGER:
+ {
+ integer_t value = stol(definition.value);
+ defineSchemeVariable(definition.name, &value, typeid (value), TypeId::INTEGER);
+ break;
+ }
+ case writer::TypeId::STRING:
+ {
+ defineSchemeVariable(definition.name, &definition.value, typeid (definition.value), TypeId::STRING);
+ break;
+ }
+ default:
+ throw cli::RelpipeCLIException(L"Unsupported type in defineSchemeVariable(): " + definition.type, cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
+ }
+ }
+
+ void undefineSchemeVariable(const string_t& name) {
+ scm_define(toSchemeSymbol(name), scm_make_undefined_variable()); // undefined != (define n)
+ // TODO: or use: scm_variable_unset_x() ?
+ }
+
+ void writeSchemeValueToAttribute(const writer::AttributeMetadata& attribute) {
+ string_t variableName = a2v(attribute.attributeName);
+ SCM schemeValue = scm_eval_string(toSchemeValue(&variableName, typeid (variableName), TypeId::STRING));
+
+ switch (attribute.typeId) {
+ case writer::TypeId::BOOLEAN:
+ {
+ boolean_t value = scm_to_bool(schemeValue);
+ return relationalWriter->writeAttribute(&value, typeid (value));
+ }
+ case writer::TypeId::INTEGER:
+ {
+ integer_t value = scm_to_int64(schemeValue);
+ return relationalWriter->writeAttribute(&value, typeid (value));
+ }
+ case writer::TypeId::STRING:
+ {
+ char* ch = scm_to_locale_string(schemeValue);
+ string_t value = convertor.from_bytes(ch);
+ free(ch);
+ return relationalWriter->writeAttribute(&value, typeid (value));
+ }
+ default:
+ throw cli::RelpipeCLIException(L"Unsupported type in writeSchemeValueToAttribute()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR);
+ }
+ }
+
+ /**
+ * Read from the Scheme variables and write to relational output stream.
+ */
+ void writeCurrentRecord() {
+ for (auto attribute : currentWriterMetadata) writeSchemeValueToAttribute(attribute);
+ }
+
+ void writeMoreRecords() {
+ while (scm_to_bool(evalSchemeCode(currentRelationConfiguration->schemeHasMoreRecords, SCM_BOOL_F))) writeCurrentRecord();
+ }
+
+public:
+
+ SchemeHandler(writer::RelationalWriter* relationalWriter, Configuration& configuration) : relationalWriter(relationalWriter), configuration(configuration) {
+ }
+
+ void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
+ if (currentRelationConfiguration) {
+ evalSchemeCode(currentRelationConfiguration->schemeAfterRecords);
+ writeMoreRecords();
+ for (DefinitionRecipe definition : currentRelationConfiguration->definitions) undefineSchemeVariable(definition.name);
+ }
+ for (auto attribute : currentReaderMetadata) undefineSchemeVariable(attribute.getAttributeName());
+
+ for (DefinitionRecipe definition : configuration.definitions) defineSchemeVariable(definition);
+
+ currentRelationConfiguration = nullptr;
+ for (int i = 0; i < configuration.relationConfigurations.size(); i++) {
+ if (regex_match(name, wregex(configuration.relationConfigurations[i].relation))) {
+ currentRelationConfiguration = &configuration.relationConfigurations[i];
+ for (DefinitionRecipe definition : currentRelationConfiguration->definitions) defineSchemeVariable(definition);
+ break; // it there are multiple matches, only the first configuration is used
+ }
+ }
+
+ currentReaderMetadata = attributes;
+ // TODO: move to a reusable method (or use same metadata on both reader and writer side?)
+ currentWriterMetadata.clear();
+ if (currentRelationConfiguration && currentRelationConfiguration->writerMetadata.size()) {
+ if (currentRelationConfiguration->inputAttributesPrepend) add(currentReaderMetadata, currentWriterMetadata);
+ currentWriterMetadata.insert(currentWriterMetadata.end(), currentRelationConfiguration->writerMetadata.begin(), currentRelationConfiguration->writerMetadata.end());
+ if (currentRelationConfiguration->inputAttributesAppend) add(currentReaderMetadata, currentWriterMetadata);
+ } else {
+ add(currentReaderMetadata, currentWriterMetadata);
+ }
+
+ generateVariableMappings();
+
+ if (currentRelationConfiguration && currentRelationConfiguration->debugVariableMapping) debugVariableMapping(name);
+
+ if (!currentRelationConfiguration || !currentRelationConfiguration->drop) relationalWriter->startRelation(name, currentWriterMetadata, true);
+
+ if (currentRelationConfiguration) {
+ // TODO: better variable name, object, function?
+ defineSchemeVariable(L"relpipe-relation-name", &name, typeid (name), TypeId::STRING);
+ evalSchemeCode(currentRelationConfiguration->schemeBeforeRecords);
+ }
+ }
+
+ void attribute(const void* value, const std::type_info& type) override {
+ if (currentRelationConfiguration) {
+ defineSchemeVariable(a2v(currentReaderMetadata[currentAttributeIndex].getAttributeName()), value, type, currentReaderMetadata[currentAttributeIndex].getTypeId());
+
+ currentAttributeIndex++;
+
+ // TODO: > 0 ?:
+ if (currentAttributeIndex > 0 && currentAttributeIndex % currentReaderMetadata.size() == 0) {
+ evalSchemeCode(currentRelationConfiguration->schemeForEach);
+ includeCurrentRecord = scm_to_bool(evalSchemeCode(currentRelationConfiguration->schemeWhere, SCM_BOOL_T));
+ if (includeCurrentRecord && !currentRelationConfiguration->drop) writeCurrentRecord();
+ includeCurrentRecord = false;
+ writeMoreRecords();
+ }
+
+ currentAttributeIndex = currentAttributeIndex % currentReaderMetadata.size();
+ } else {
+ relationalWriter->writeAttribute(value, type);
+ }
+ }
+
+ void endOfPipe() {
+ if (currentRelationConfiguration) {
+ evalSchemeCode(currentRelationConfiguration->schemeAfterRecords);
+ writeMoreRecords();
+ }
+ }
+
+};
+
+}
+}
+}