src/SQLHandler.h
author František Kučera <franta-hg@frantovo.cz>
Tue, 13 Dec 2022 02:07:00 +0100
branchv_0
changeset 3 202ce847990c
parent 2 8a30971d285f
permissions -rw-r--r--
configuration: --write-ddl, --write-dml, --write-column-names, --insert-mode, --type-cast

/**
 * Relational pipes
 * Copyright © 2022 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <memory>
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include <locale>
#include <codecvt>

#include <relpipe/reader/typedefs.h>
#include <relpipe/reader/TypeId.h>
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
#include <relpipe/reader/handlers/AttributeMetadata.h>

#include "Configuration.h"
#include "RelpipeSQLWriterException.h"

namespace relpipe {
namespace out {
namespace sql {

using namespace relpipe;
using namespace relpipe::reader;
using namespace relpipe::reader::handlers;

class SQLHandler : public RelationalReaderStringHandler {
private:
	std::ostream& output;
	Configuration& configuration;
	std::wstring_convert<std::codecvt_utf8<wchar_t>> convertor; // generate SQL always in UTF-8
	std::vector<AttributeMetadata> currentAttributes;
	integer_t valueCount = 0;
	integer_t recordCount = 0;
	string_t currentRelation;

	/**
	 * @param a
	 * @param b
	 * @return true if relations have same number and types of attributes (names may differ)
	 */
	bool matches(const std::vector<AttributeMetadata>& a, const std::vector<AttributeMetadata>& b) {
		if (a.size() != b.size()) return false;
		for (int i = 0, limit = a.size(); i < limit; i++) if (a[i].getTypeId() != b[i].getTypeId()) return false;
		return true;
	}

	static void writeIdentifier(std::ostream& output, std::string identifier) {
		output << '"';
		for (auto & ch : identifier) {
			if (ch == '"') output << "\"\"";
			else output << ch;
		}
		output << '"';
	}

	static void writeType(std::ostream& output, std::string type) {
		if (std::regex_match(type, std::regex("[a-z0-9]+(\\([0-9]+(,\\s*[0-9]+)*\\))?"))) output << type;
		else writeIdentifier(output, type);
	}

	static void writeValue(std::ostream& output, std::string value) {
		output << '\'';
		for (auto & ch : value) {
			if (ch == '\'') output << "''";
			else output << ch;
		}
		output << '\'';
	}

	void writeRecordCount() {
		// currently disabled due to relpipe-in-sql parser issues with last comment without any following expression
		// output << "-- Record count: " << recordCount << std::endl;
	}

	void endRelation() {
		if (getWriteDML()) {
			if (getInsertMode() == Configuration::InsertMode::MULTI) {
				output << std::endl << ";" << std::endl;
			}

			writeRecordCount();
		}
	}

	// TODO: code deduplication

	Configuration::InsertMode getInsertMode() {
		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
			if (std::regex_match(currentRelation, rc.relationPattern)) {
				if (rc.insertMode != Configuration::InsertMode::DEFAULT) return rc.insertMode;
			}
		}
		return Configuration::InsertMode::MULTI;
	}

	// TODO: code deduplication

	bool getWriteDDL() {
		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
			if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDDL;
		}
		return true;
	}

	// TODO: code deduplication

	bool getWriteDML() {
		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
			if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDML;
		}
		return true;
	}

	// TODO: code deduplication

	bool getWriteColumnNames() {
		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
			if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeColumnNames;
		}
		return true;
	}

	string_t findSQLType(const AttributeMetadata attribute) {
		// Type casts provided by the user:
		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
			if (std::regex_match(currentRelation, rc.relationPattern)) {
				for (const Configuration::TypeCastRule tcr : rc.typeCastRules) {
					if (std::regex_match(attribute.getAttributeName(), tcr.attribute) && std::regex_match(attribute.getTypeName(), tcr.type)) return tcr.sqlType;
				}
			}
		}

		// Default mapping:
		if (attribute.getTypeId() == reader::TypeId::BOOLEAN) return L"integer";
		else if (attribute.getTypeId() == reader::TypeId::INTEGER) return L"bigint";
		else if (attribute.getTypeId() == reader::TypeId::STRING) return L"text";
		else throw RelpipeSQLWriterException(L"Type not yet supported in the SQL output: " + attribute.getTypeName());
	}

public:

	SQLHandler(std::ostream& output, Configuration& configuration) : output(output), configuration(configuration) {
	}

	void startRelation(string_t name, std::vector<AttributeMetadata> attributes) override {
		// TODO: ALTER TABLE / add columns on duplicate relation name
		// TODO: optional transformation to upper/lower case
		// TODO: custom primary key or other column properties
		// TODO: custom table properties
		// TODO: custom SQL script before/after stream/relation/record
		// TODO: comments and/or custom comments + record count of each table as a comment
		// TODO: optional transactions: BEGIN/COMMIT/ROLLBACK for stream/relation/record
		// TODO: optional wrapping at certain width (like 80 characters)?
		// TODO: optional syntax highlighting?
		// TODO: share code/behavior with relpipe-tr-sql (but it uses parametrized statements)

		if (currentRelation.size()) {
			endRelation();
			if (getWriteDDL() || getWriteDML()) output << std::endl;
		}

		currentRelation = name;
		currentAttributes = attributes;
		recordCount = 0;
		valueCount = 0;

		if (getWriteDDL()) {
			output << "CREATE TABLE ";
			writeIdentifier(output, convertor.to_bytes(currentRelation));
			output << " (" << std::endl;
			for (size_t i = 0, limit = attributes.size(); i < limit; i++) {
				auto attribute = attributes[i];
				output << "\t";
				writeIdentifier(output, convertor.to_bytes(attribute.getAttributeName()));
				// TODO: implement RelationalReaderValueHandler
				output << " ";
				writeType(output, convertor.to_bytes(findSQLType(attribute)));
				if (i < (limit - 1)) output << ",";
				output << std::endl;

			}
			output << ");" << std::endl;
			if (getWriteDML()) output << std::endl;
		}
	}

	void attribute(const string_t& value) override {
		if (getWriteDML() == false) return;

		if (valueCount % currentAttributes.size() == 0) {
			// TODO: optional use of function/procedure instead of INSERT
			// TODO: custom line-ends + indentation
			recordCount++;

			auto insertMode = getInsertMode();
			if (insertMode == Configuration::InsertMode::SINGLE) {
				output << "INSERT INTO ";
				writeIdentifier(output, convertor.to_bytes(currentRelation));

				if (getWriteColumnNames()) {
					output << " (";
					for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
						writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
						if (i < (limit - 1)) output << ", ";
					}
					output << ")";
				}

				output << " VALUES (";
			} else if (insertMode == Configuration::InsertMode::MULTI) {
				if (recordCount == 1) {
					output << "INSERT INTO ";
					writeIdentifier(output, convertor.to_bytes(currentRelation));

					if (getWriteColumnNames()) {
						output << "\n\t(";
						for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
							writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
							if (i < (limit - 1)) output << ", ";
						}
						output << ")" << std::endl;
					} else {
						output << " ";
					}

					output << "VALUES" << std::endl;
				} else {
					output << "," << std::endl;
				}
				output << "\t(";
			} else {
				throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode));
			}
		}

		valueCount++;

		if (value.size() > 0) {
			// TODO: support all data types + implement RelationalReaderValueHandler
			writeValue(output, convertor.to_bytes(value));
		} else {
			// TODO: support actual nulls when supported in the relpipe data format + just optional conversion from empty strings to NULLs
			output << "NULL";
		}

		if (valueCount % currentAttributes.size()) {
			output << ", ";
		} else {
			auto insertMode = getInsertMode();
			if (insertMode == Configuration::InsertMode::SINGLE) {
				output << ");" << std::endl;
			} else if (insertMode == Configuration::InsertMode::MULTI) {
				output << ")";
			} else {
				throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode));
			}
			valueCount = 0;
		}
	}

	void endOfPipe() {
		if (currentRelation.size()) {
			endRelation();
		}
		output.flush();
	}

};

}
}
}