configuration: --write-ddl, --write-dml, --write-column-names, --insert-mode, --type-cast
--- a/bash-completion.sh Sat Dec 10 20:41:40 2022 +0100
+++ b/bash-completion.sh Tue Dec 13 02:07:00 2022 +0100
@@ -22,17 +22,50 @@
w2=${COMP_WORDS[COMP_CWORD-2]}
w3=${COMP_WORDS[COMP_CWORD-3]}
- WRITE_HEADER=(
+ BOOLEAN_VALUES=(
"true"
"false"
)
- if [[ "$w1" == "--write-header" ]]; then COMPREPLY=($(compgen -W "${WRITE_HEADER[*]}" -- "$w0"))
- elif [[ "$w1" == "--write-types" ]]; then COMPREPLY=($(compgen -W "${WRITE_HEADER[*]}" -- "$w0"))
+ INSERT_MODES=(
+ "single"
+ "multi"
+ )
+
+ RELPIPE_TYPES=(
+ "boolean"
+ "integer"
+ "string"
+ )
+
+ SQL_TYPES=(
+ "text"
+ "integer"
+ "bigint"
+ "numeric"
+ "varchar(20)"
+ "bit"
+ "date"
+ "time"
+ "timestamp"
+ )
+
+ if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("'.*'")
+ elif [[ "$w1" == "--write-ddl" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+ elif [[ "$w1" == "--write-dml" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+ elif [[ "$w1" == "--write-column-names" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+ elif [[ "$w1" == "--insert-mode" ]]; then COMPREPLY=($(compgen -W "${INSERT_MODES[*]}" -- "$w0"))
+ elif [[ "$w1" == "--type-cast" && "x$w0" == "x" ]]; then COMPREPLY=("'.*'")
+ elif [[ "$w2" == "--type-cast" ]]; then COMPREPLY=($(compgen -W "${RELPIPE_TYPES[*]}" -- "$w0"))
+ elif [[ "$w3" == "--type-cast" ]]; then COMPREPLY=($(compgen -W "${SQL_TYPES[*]}" -- "$w0"))
else
OPTIONS=(
- "--write-header"
- "--write-types"
+ "--relation"
+ "--write-ddl"
+ "--write-dml"
+ "--write-column-names"
+ "--insert-mode"
+ "--type-cast"
)
COMPREPLY=($(compgen -W "${OPTIONS[*]}" -- "$w0"))
fi
--- a/src/CLIParser.h Sat Dec 10 20:41:40 2022 +0100
+++ b/src/CLIParser.h Tue Dec 13 02:07:00 2022 +0100
@@ -46,23 +46,65 @@
else throw relpipe::cli::RelpipeCLIException(L"Unable to parse boolean value: " + value + L" (expecting true or false)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
}
+ Configuration::InsertMode parseInsertMode(const relpipe::reader::string_t& value) {
+ if (value == L"single") return Configuration::InsertMode::SINGLE;
+ else if (value == L"multi") return Configuration::InsertMode::MULTI;
+ else throw relpipe::cli::RelpipeCLIException(L"Unable to parse InsertMode value: " + value + L" (expecting single or multi)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
+ }
+
+ void updateRelationName(Configuration::RelationConfiguration& currentRelation) {
+ if (currentRelation.relation.size() == 0) currentRelation.relation = L".*";
+ }
+
+ void addRelation(Configuration& c, Configuration::RelationConfiguration& currentRelation) {
+ if (currentRelation.relation.size()) {
+ currentRelation.relationPattern = std::wregex(currentRelation.relation);
+ c.relationConfigurations.push_back(currentRelation);
+ currentRelation = Configuration::RelationConfiguration();
+ }
+ }
+
public:
- static const relpipe::reader::string_t OPTION_WRITE_HEADER;
- static const relpipe::reader::string_t OPTION_WRITE_TYPES;
+ static const relpipe::reader::string_t OPTION_RELATION;
+ static const relpipe::reader::string_t OPTION_WRITE_DDL;
+ static const relpipe::reader::string_t OPTION_WRITE_DML;
+ static const relpipe::reader::string_t OPTION_WRITE_COLUMN_NAMES;
+ static const relpipe::reader::string_t OPTION_INSERT_MODE;
+ static const relpipe::reader::string_t OPTION_TYPE_CAST;
Configuration parse(const std::vector<relpipe::reader::string_t>& arguments) {
Configuration c;
+ Configuration::RelationConfiguration currentRelation;
for (int i = 0; i < arguments.size();) {
relpipe::reader::string_t option = readNext(arguments, i);
- if (option == OPTION_WRITE_HEADER) {
- c.writeHeader = parseBoolean(readNext(arguments, i));
- } else if (option == OPTION_WRITE_TYPES) {
- c.writeTypes = parseBoolean(readNext(arguments, i));
+ if (option == OPTION_RELATION) {
+ addRelation(c, currentRelation); // previous relation
+ currentRelation.relation = readNext(arguments, i);
+ } else if (option == OPTION_WRITE_DDL) {
+ updateRelationName(currentRelation);
+ currentRelation.writeDDL = parseBoolean(readNext(arguments, i));
+ } else if (option == OPTION_WRITE_DML) {
+ updateRelationName(currentRelation);
+ currentRelation.writeDML = parseBoolean(readNext(arguments, i));
+ } else if (option == OPTION_WRITE_COLUMN_NAMES) {
+ updateRelationName(currentRelation);
+ currentRelation.writeColumnNames = parseBoolean(readNext(arguments, i));
+ } else if (option == OPTION_INSERT_MODE) {
+ updateRelationName(currentRelation);
+ currentRelation.insertMode = parseInsertMode(readNext(arguments, i));
+ } else if (option == OPTION_TYPE_CAST) {
+ updateRelationName(currentRelation);
+ Configuration::TypeCastRule tcr;
+ tcr.attribute = std::wregex(readNext(arguments, i));
+ tcr.type = std::wregex(readNext(arguments, i));
+ tcr.sqlType = readNext(arguments, i);
+ currentRelation.typeCastRules.push_back(tcr);
} else throw relpipe::cli::RelpipeCLIException(L"Unsupported CLI option: " + option, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
}
+ addRelation(c, currentRelation); // last relation
return c;
}
@@ -71,8 +113,12 @@
}
};
-const relpipe::reader::string_t CLIParser::OPTION_WRITE_HEADER = L"--write-header";
-const relpipe::reader::string_t CLIParser::OPTION_WRITE_TYPES = L"--write-types";
+const relpipe::common::type::StringX CLIParser::OPTION_RELATION = L"--relation";
+const relpipe::common::type::StringX CLIParser::OPTION_WRITE_DDL = L"--write-ddl";
+const relpipe::common::type::StringX CLIParser::OPTION_WRITE_DML = L"--write-dml";
+const relpipe::common::type::StringX CLIParser::OPTION_WRITE_COLUMN_NAMES = L"--write-column-names";
+const relpipe::common::type::StringX CLIParser::OPTION_INSERT_MODE = L"--insert-mode";
+const relpipe::common::type::StringX CLIParser::OPTION_TYPE_CAST = L"--type-cast";
}
}
--- a/src/Configuration.h Sat Dec 10 20:41:40 2022 +0100
+++ b/src/Configuration.h Tue Dec 13 02:07:00 2022 +0100
@@ -18,7 +18,9 @@
#include <vector>
#include <iostream>
+#include <regex>
+#include <relpipe/common/type/typedefs.h>
#include <relpipe/reader/typedefs.h>
@@ -30,13 +32,31 @@
public:
enum class InsertMode {
+ DEFAULT,
SINGLE,
MULTI,
};
- relpipe::reader::boolean_t writeHeader = true;
- relpipe::reader::boolean_t writeTypes = false;
- InsertMode insertMode = InsertMode::MULTI; // TODO: per-relation mode
+ class TypeCastRule {
+ public:
+
+ std::wregex attribute = std::wregex(L".*");
+ std::wregex type = std::wregex(L".*");
+ relpipe::common::type::StringX sqlType;
+ };
+
+ class RelationConfiguration {
+ public:
+ relpipe::common::type::StringX relation;
+ std::wregex relationPattern;
+ relpipe::common::type::Boolean writeDDL = true;
+ relpipe::common::type::Boolean writeDML = true;
+ relpipe::common::type::Boolean writeColumnNames = true;
+ std::vector<TypeCastRule> typeCastRules;
+ InsertMode insertMode = InsertMode::DEFAULT;
+ };
+
+ std::vector<RelationConfiguration> relationConfigurations;
virtual ~Configuration() {
}
--- a/src/SQLHandler.h Sat Dec 10 20:41:40 2022 +0100
+++ b/src/SQLHandler.h Tue Dec 13 02:07:00 2022 +0100
@@ -48,7 +48,7 @@
std::vector<AttributeMetadata> currentAttributes;
integer_t valueCount = 0;
integer_t recordCount = 0;
- string_t currentTable;
+ string_t currentRelation;
/**
* @param a
@@ -70,6 +70,11 @@
output << '"';
}
+ static void writeType(std::ostream& output, std::string type) {
+ if (std::regex_match(type, std::regex("[a-z0-9]+(\\([0-9]+(,\\s*[0-9]+)*\\))?"))) output << type;
+ else writeIdentifier(output, type);
+ }
+
static void writeValue(std::ostream& output, std::string value) {
output << '\'';
for (auto & ch : value) {
@@ -85,12 +90,68 @@
}
void endRelation() {
+ if (getWriteDML()) {
+ if (getInsertMode() == Configuration::InsertMode::MULTI) {
+ output << std::endl << ";" << std::endl;
+ }
- if (configuration.insertMode == Configuration::InsertMode::MULTI) {
- output << std::endl << ";" << std::endl;
+ writeRecordCount();
+ }
+ }
+
+ // TODO: code deduplication
+
+ Configuration::InsertMode getInsertMode() {
+ for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+ if (std::regex_match(currentRelation, rc.relationPattern)) {
+ if (rc.insertMode != Configuration::InsertMode::DEFAULT) return rc.insertMode;
+ }
+ }
+ return Configuration::InsertMode::MULTI;
+ }
+
+ // TODO: code deduplication
+
+ bool getWriteDDL() {
+ for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+ if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDDL;
+ }
+ return true;
+ }
+
+ // TODO: code deduplication
+
+ bool getWriteDML() {
+ for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+ if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDML;
+ }
+ return true;
+ }
+
+ // TODO: code deduplication
+
+ bool getWriteColumnNames() {
+ for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+ if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeColumnNames;
+ }
+ return true;
+ }
+
+ string_t findSQLType(const AttributeMetadata attribute) {
+ // Type casts provided by the user:
+ for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+ if (std::regex_match(currentRelation, rc.relationPattern)) {
+ for (const Configuration::TypeCastRule tcr : rc.typeCastRules) {
+ if (std::regex_match(attribute.getAttributeName(), tcr.attribute) && std::regex_match(attribute.getTypeName(), tcr.type)) return tcr.sqlType;
+ }
+ }
}
- writeRecordCount();
+ // Default mapping:
+ if (attribute.getTypeId() == reader::TypeId::BOOLEAN) return L"integer";
+ else if (attribute.getTypeId() == reader::TypeId::INTEGER) return L"bigint";
+ else if (attribute.getTypeId() == reader::TypeId::STRING) return L"text";
+ else throw RelpipeSQLWriterException(L"Type not yet supported in the SQL output: " + attribute.getTypeName());
}
public:
@@ -100,9 +161,7 @@
void startRelation(string_t name, std::vector<AttributeMetadata> attributes) override {
// TODO: ALTER TABLE / add columns on duplicate relation name
- // TODO: optionally omit CREATE/ALTER table (just INSERT)
// TODO: optional transformation to upper/lower case
- // TODO: custom data type mapping
// TODO: custom primary key or other column properties
// TODO: custom table properties
// TODO: custom SQL script before/after stream/relation/record
@@ -112,82 +171,82 @@
// TODO: optional syntax highlighting?
// TODO: share code/behavior with relpipe-tr-sql (but it uses parametrized statements)
- if (currentTable.size()) {
+ if (currentRelation.size()) {
endRelation();
- output << std::endl;
+ if (getWriteDDL() || getWriteDML()) output << std::endl;
}
- currentTable = name;
+ currentRelation = name;
currentAttributes = attributes;
recordCount = 0;
valueCount = 0;
- output << "CREATE TABLE ";
- writeIdentifier(output, convertor.to_bytes(currentTable));
- output << " (" << std::endl;
- for (size_t i = 0, limit = attributes.size(); i < limit; i++) {
- auto attribute = attributes[i];
- output << "\t";
- writeIdentifier(output, convertor.to_bytes(attribute.getAttributeName()));
- // TODO: support all data types + implement RelationalReaderValueHandler
- output << " TEXT";
- if (i < (limit - 1)) output << ",";
- output << std::endl;
+ if (getWriteDDL()) {
+ output << "CREATE TABLE ";
+ writeIdentifier(output, convertor.to_bytes(currentRelation));
+ output << " (" << std::endl;
+ for (size_t i = 0, limit = attributes.size(); i < limit; i++) {
+ auto attribute = attributes[i];
+ output << "\t";
+ writeIdentifier(output, convertor.to_bytes(attribute.getAttributeName()));
+ // TODO: implement RelationalReaderValueHandler
+ output << " ";
+ writeType(output, convertor.to_bytes(findSQLType(attribute)));
+ if (i < (limit - 1)) output << ",";
+ output << std::endl;
- }
- output << ");" << std::endl << std::endl;
-
- if (currentAttributes.empty()) {
- //if (configuration.writeHeader) for (auto attr : attributes) attribute(configuration.writeTypes ? attr.getAttributeName() + L"::" + attr.getTypeName() : attr.getAttributeName());
- } else if (matches(currentAttributes, attributes)) {
- // do UNION ALL – just append the records
- } else {
- // throw RelpipeSQLWriterException(L"To the SQL format we can convert only one relation or multiple relations that have same number of attributes of same types (relation and attribute names may differ – result is named after the first one).");
+ }
+ output << ");" << std::endl;
+ if (getWriteDML()) output << std::endl;
}
}
void attribute(const string_t& value) override {
+ if (getWriteDML() == false) return;
if (valueCount % currentAttributes.size() == 0) {
// TODO: optional use of function/procedure instead of INSERT
- // TODO: optional INSERT of multiple records
// TODO: custom line-ends + indentation
- // TODO: optionally write also the column names
recordCount++;
- if (configuration.insertMode == Configuration::InsertMode::SINGLE) {
+ auto insertMode = getInsertMode();
+ if (insertMode == Configuration::InsertMode::SINGLE) {
output << "INSERT INTO ";
- writeIdentifier(output, convertor.to_bytes(currentTable));
+ writeIdentifier(output, convertor.to_bytes(currentRelation));
- output << " (";
- for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
- writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
- if (i < (limit - 1)) output << ", ";
- }
- output << ")";
-
- output << " VALUES (";
- } else if (configuration.insertMode == Configuration::InsertMode::MULTI) {
- if (recordCount == 1) {
- // --------
- output << "INSERT INTO ";
- writeIdentifier(output, convertor.to_bytes(currentTable));
-
- output << "\n\t(";
+ if (getWriteColumnNames()) {
+ output << " (";
for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
if (i < (limit - 1)) output << ", ";
}
output << ")";
- // --------
+ }
+
+ output << " VALUES (";
+ } else if (insertMode == Configuration::InsertMode::MULTI) {
+ if (recordCount == 1) {
+ output << "INSERT INTO ";
+ writeIdentifier(output, convertor.to_bytes(currentRelation));
- output << std::endl << "VALUES" << std::endl;
+ if (getWriteColumnNames()) {
+ output << "\n\t(";
+ for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
+ writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
+ if (i < (limit - 1)) output << ", ";
+ }
+ output << ")" << std::endl;
+ } else {
+ output << " ";
+ }
+
+ output << "VALUES" << std::endl;
} else {
output << "," << std::endl;
}
output << "\t(";
} else {
- throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) configuration.insertMode));
+ throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode));
}
}
@@ -204,19 +263,20 @@
if (valueCount % currentAttributes.size()) {
output << ", ";
} else {
- if (configuration.insertMode == Configuration::InsertMode::SINGLE) {
+ auto insertMode = getInsertMode();
+ if (insertMode == Configuration::InsertMode::SINGLE) {
output << ");" << std::endl;
- } else if (configuration.insertMode == Configuration::InsertMode::MULTI) {
+ } else if (insertMode == Configuration::InsertMode::MULTI) {
output << ")";
} else {
- throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) configuration.insertMode));
+ throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode));
}
valueCount = 0;
}
}
void endOfPipe() {
- if (currentTable.size()) {
+ if (currentRelation.size()) {
endRelation();
}
output.flush();