configuration: --write-ddl, --write-dml, --write-column-names, --insert-mode, --type-cast
/**
* Relational pipes
* Copyright © 2022 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <memory>
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include <locale>
#include <codecvt>
#include <relpipe/reader/typedefs.h>
#include <relpipe/reader/TypeId.h>
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
#include <relpipe/reader/handlers/AttributeMetadata.h>
#include "Configuration.h"
#include "RelpipeSQLWriterException.h"
namespace relpipe {
namespace out {
namespace sql {
using namespace relpipe;
using namespace relpipe::reader;
using namespace relpipe::reader::handlers;
class SQLHandler : public RelationalReaderStringHandler {
private:
std::ostream& output;
Configuration& configuration;
std::wstring_convert<std::codecvt_utf8<wchar_t>> convertor; // generate SQL always in UTF-8
std::vector<AttributeMetadata> currentAttributes;
integer_t valueCount = 0;
integer_t recordCount = 0;
string_t currentRelation;
/**
* @param a
* @param b
* @return true if relations have same number and types of attributes (names may differ)
*/
bool matches(const std::vector<AttributeMetadata>& a, const std::vector<AttributeMetadata>& b) {
if (a.size() != b.size()) return false;
for (int i = 0, limit = a.size(); i < limit; i++) if (a[i].getTypeId() != b[i].getTypeId()) return false;
return true;
}
static void writeIdentifier(std::ostream& output, std::string identifier) {
output << '"';
for (auto & ch : identifier) {
if (ch == '"') output << "\"\"";
else output << ch;
}
output << '"';
}
static void writeType(std::ostream& output, std::string type) {
if (std::regex_match(type, std::regex("[a-z0-9]+(\\([0-9]+(,\\s*[0-9]+)*\\))?"))) output << type;
else writeIdentifier(output, type);
}
static void writeValue(std::ostream& output, std::string value) {
output << '\'';
for (auto & ch : value) {
if (ch == '\'') output << "''";
else output << ch;
}
output << '\'';
}
void writeRecordCount() {
// currently disabled due to relpipe-in-sql parser issues with last comment without any following expression
// output << "-- Record count: " << recordCount << std::endl;
}
void endRelation() {
if (getWriteDML()) {
if (getInsertMode() == Configuration::InsertMode::MULTI) {
output << std::endl << ";" << std::endl;
}
writeRecordCount();
}
}
// TODO: code deduplication
Configuration::InsertMode getInsertMode() {
for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
if (std::regex_match(currentRelation, rc.relationPattern)) {
if (rc.insertMode != Configuration::InsertMode::DEFAULT) return rc.insertMode;
}
}
return Configuration::InsertMode::MULTI;
}
// TODO: code deduplication
bool getWriteDDL() {
for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDDL;
}
return true;
}
// TODO: code deduplication
bool getWriteDML() {
for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDML;
}
return true;
}
// TODO: code deduplication
bool getWriteColumnNames() {
for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeColumnNames;
}
return true;
}
string_t findSQLType(const AttributeMetadata attribute) {
// Type casts provided by the user:
for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
if (std::regex_match(currentRelation, rc.relationPattern)) {
for (const Configuration::TypeCastRule tcr : rc.typeCastRules) {
if (std::regex_match(attribute.getAttributeName(), tcr.attribute) && std::regex_match(attribute.getTypeName(), tcr.type)) return tcr.sqlType;
}
}
}
// Default mapping:
if (attribute.getTypeId() == reader::TypeId::BOOLEAN) return L"integer";
else if (attribute.getTypeId() == reader::TypeId::INTEGER) return L"bigint";
else if (attribute.getTypeId() == reader::TypeId::STRING) return L"text";
else throw RelpipeSQLWriterException(L"Type not yet supported in the SQL output: " + attribute.getTypeName());
}
public:
SQLHandler(std::ostream& output, Configuration& configuration) : output(output), configuration(configuration) {
}
void startRelation(string_t name, std::vector<AttributeMetadata> attributes) override {
// TODO: ALTER TABLE / add columns on duplicate relation name
// TODO: optional transformation to upper/lower case
// TODO: custom primary key or other column properties
// TODO: custom table properties
// TODO: custom SQL script before/after stream/relation/record
// TODO: comments and/or custom comments + record count of each table as a comment
// TODO: optional transactions: BEGIN/COMMIT/ROLLBACK for stream/relation/record
// TODO: optional wrapping at certain width (like 80 characters)?
// TODO: optional syntax highlighting?
// TODO: share code/behavior with relpipe-tr-sql (but it uses parametrized statements)
if (currentRelation.size()) {
endRelation();
if (getWriteDDL() || getWriteDML()) output << std::endl;
}
currentRelation = name;
currentAttributes = attributes;
recordCount = 0;
valueCount = 0;
if (getWriteDDL()) {
output << "CREATE TABLE ";
writeIdentifier(output, convertor.to_bytes(currentRelation));
output << " (" << std::endl;
for (size_t i = 0, limit = attributes.size(); i < limit; i++) {
auto attribute = attributes[i];
output << "\t";
writeIdentifier(output, convertor.to_bytes(attribute.getAttributeName()));
// TODO: implement RelationalReaderValueHandler
output << " ";
writeType(output, convertor.to_bytes(findSQLType(attribute)));
if (i < (limit - 1)) output << ",";
output << std::endl;
}
output << ");" << std::endl;
if (getWriteDML()) output << std::endl;
}
}
void attribute(const string_t& value) override {
if (getWriteDML() == false) return;
if (valueCount % currentAttributes.size() == 0) {
// TODO: optional use of function/procedure instead of INSERT
// TODO: custom line-ends + indentation
recordCount++;
auto insertMode = getInsertMode();
if (insertMode == Configuration::InsertMode::SINGLE) {
output << "INSERT INTO ";
writeIdentifier(output, convertor.to_bytes(currentRelation));
if (getWriteColumnNames()) {
output << " (";
for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
if (i < (limit - 1)) output << ", ";
}
output << ")";
}
output << " VALUES (";
} else if (insertMode == Configuration::InsertMode::MULTI) {
if (recordCount == 1) {
output << "INSERT INTO ";
writeIdentifier(output, convertor.to_bytes(currentRelation));
if (getWriteColumnNames()) {
output << "\n\t(";
for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
if (i < (limit - 1)) output << ", ";
}
output << ")" << std::endl;
} else {
output << " ";
}
output << "VALUES" << std::endl;
} else {
output << "," << std::endl;
}
output << "\t(";
} else {
throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode));
}
}
valueCount++;
if (value.size() > 0) {
// TODO: support all data types + implement RelationalReaderValueHandler
writeValue(output, convertor.to_bytes(value));
} else {
// TODO: support actual nulls when supported in the relpipe data format + just optional conversion from empty strings to NULLs
output << "NULL";
}
if (valueCount % currentAttributes.size()) {
output << ", ";
} else {
auto insertMode = getInsertMode();
if (insertMode == Configuration::InsertMode::SINGLE) {
output << ");" << std::endl;
} else if (insertMode == Configuration::InsertMode::MULTI) {
output << ")";
} else {
throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode));
}
valueCount = 0;
}
}
void endOfPipe() {
if (currentRelation.size()) {
endRelation();
}
output.flush();
}
};
}
}
}