# HG changeset patch # User František Kučera # Date 1670893620 -3600 # Node ID 202ce847990c85cd8f59e7f8f836b205d7a76dba # Parent 8a30971d285f934102801024144cf1a3f05e5a5a configuration: --write-ddl, --write-dml, --write-column-names, --insert-mode, --type-cast diff -r 8a30971d285f -r 202ce847990c bash-completion.sh --- a/bash-completion.sh Sat Dec 10 20:41:40 2022 +0100 +++ b/bash-completion.sh Tue Dec 13 02:07:00 2022 +0100 @@ -22,17 +22,50 @@ w2=${COMP_WORDS[COMP_CWORD-2]} w3=${COMP_WORDS[COMP_CWORD-3]} - WRITE_HEADER=( + BOOLEAN_VALUES=( "true" "false" ) - if [[ "$w1" == "--write-header" ]]; then COMPREPLY=($(compgen -W "${WRITE_HEADER[*]}" -- "$w0")) - elif [[ "$w1" == "--write-types" ]]; then COMPREPLY=($(compgen -W "${WRITE_HEADER[*]}" -- "$w0")) + INSERT_MODES=( + "single" + "multi" + ) + + RELPIPE_TYPES=( + "boolean" + "integer" + "string" + ) + + SQL_TYPES=( + "text" + "integer" + "bigint" + "numeric" + "varchar(20)" + "bit" + "date" + "time" + "timestamp" + ) + + if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("'.*'") + elif [[ "$w1" == "--write-ddl" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w1" == "--write-dml" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w1" == "--write-column-names" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w1" == "--insert-mode" ]]; then COMPREPLY=($(compgen -W "${INSERT_MODES[*]}" -- "$w0")) + elif [[ "$w1" == "--type-cast" && "x$w0" == "x" ]]; then COMPREPLY=("'.*'") + elif [[ "$w2" == "--type-cast" ]]; then COMPREPLY=($(compgen -W "${RELPIPE_TYPES[*]}" -- "$w0")) + elif [[ "$w3" == "--type-cast" ]]; then COMPREPLY=($(compgen -W "${SQL_TYPES[*]}" -- "$w0")) else OPTIONS=( - "--write-header" - "--write-types" + "--relation" + "--write-ddl" + "--write-dml" + "--write-column-names" + "--insert-mode" + "--type-cast" ) COMPREPLY=($(compgen -W "${OPTIONS[*]}" -- "$w0")) fi diff -r 8a30971d285f -r 202ce847990c src/CLIParser.h --- a/src/CLIParser.h Sat Dec 10 20:41:40 2022 +0100 +++ b/src/CLIParser.h Tue Dec 13 02:07:00 2022 +0100 @@ -46,23 +46,65 @@ else throw relpipe::cli::RelpipeCLIException(L"Unable to parse boolean value: " + value + L" (expecting true or false)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS); } + Configuration::InsertMode parseInsertMode(const relpipe::reader::string_t& value) { + if (value == L"single") return Configuration::InsertMode::SINGLE; + else if (value == L"multi") return Configuration::InsertMode::MULTI; + else throw relpipe::cli::RelpipeCLIException(L"Unable to parse InsertMode value: " + value + L" (expecting single or multi)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS); + } + + void updateRelationName(Configuration::RelationConfiguration& currentRelation) { + if (currentRelation.relation.size() == 0) currentRelation.relation = L".*"; + } + + void addRelation(Configuration& c, Configuration::RelationConfiguration& currentRelation) { + if (currentRelation.relation.size()) { + currentRelation.relationPattern = std::wregex(currentRelation.relation); + c.relationConfigurations.push_back(currentRelation); + currentRelation = Configuration::RelationConfiguration(); + } + } + public: - static const relpipe::reader::string_t OPTION_WRITE_HEADER; - static const relpipe::reader::string_t OPTION_WRITE_TYPES; + static const relpipe::reader::string_t OPTION_RELATION; + static const relpipe::reader::string_t OPTION_WRITE_DDL; + static const relpipe::reader::string_t OPTION_WRITE_DML; + static const relpipe::reader::string_t OPTION_WRITE_COLUMN_NAMES; + static const relpipe::reader::string_t OPTION_INSERT_MODE; + static const relpipe::reader::string_t OPTION_TYPE_CAST; Configuration parse(const std::vector& arguments) { Configuration c; + Configuration::RelationConfiguration currentRelation; for (int i = 0; i < arguments.size();) { relpipe::reader::string_t option = readNext(arguments, i); - if (option == OPTION_WRITE_HEADER) { - c.writeHeader = parseBoolean(readNext(arguments, i)); - } else if (option == OPTION_WRITE_TYPES) { - c.writeTypes = parseBoolean(readNext(arguments, i)); + if (option == OPTION_RELATION) { + addRelation(c, currentRelation); // previous relation + currentRelation.relation = readNext(arguments, i); + } else if (option == OPTION_WRITE_DDL) { + updateRelationName(currentRelation); + currentRelation.writeDDL = parseBoolean(readNext(arguments, i)); + } else if (option == OPTION_WRITE_DML) { + updateRelationName(currentRelation); + currentRelation.writeDML = parseBoolean(readNext(arguments, i)); + } else if (option == OPTION_WRITE_COLUMN_NAMES) { + updateRelationName(currentRelation); + currentRelation.writeColumnNames = parseBoolean(readNext(arguments, i)); + } else if (option == OPTION_INSERT_MODE) { + updateRelationName(currentRelation); + currentRelation.insertMode = parseInsertMode(readNext(arguments, i)); + } else if (option == OPTION_TYPE_CAST) { + updateRelationName(currentRelation); + Configuration::TypeCastRule tcr; + tcr.attribute = std::wregex(readNext(arguments, i)); + tcr.type = std::wregex(readNext(arguments, i)); + tcr.sqlType = readNext(arguments, i); + currentRelation.typeCastRules.push_back(tcr); } else throw relpipe::cli::RelpipeCLIException(L"Unsupported CLI option: " + option, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS); } + addRelation(c, currentRelation); // last relation return c; } @@ -71,8 +113,12 @@ } }; -const relpipe::reader::string_t CLIParser::OPTION_WRITE_HEADER = L"--write-header"; -const relpipe::reader::string_t CLIParser::OPTION_WRITE_TYPES = L"--write-types"; +const relpipe::common::type::StringX CLIParser::OPTION_RELATION = L"--relation"; +const relpipe::common::type::StringX CLIParser::OPTION_WRITE_DDL = L"--write-ddl"; +const relpipe::common::type::StringX CLIParser::OPTION_WRITE_DML = L"--write-dml"; +const relpipe::common::type::StringX CLIParser::OPTION_WRITE_COLUMN_NAMES = L"--write-column-names"; +const relpipe::common::type::StringX CLIParser::OPTION_INSERT_MODE = L"--insert-mode"; +const relpipe::common::type::StringX CLIParser::OPTION_TYPE_CAST = L"--type-cast"; } } diff -r 8a30971d285f -r 202ce847990c src/Configuration.h --- a/src/Configuration.h Sat Dec 10 20:41:40 2022 +0100 +++ b/src/Configuration.h Tue Dec 13 02:07:00 2022 +0100 @@ -18,7 +18,9 @@ #include #include +#include +#include #include @@ -30,13 +32,31 @@ public: enum class InsertMode { + DEFAULT, SINGLE, MULTI, }; - relpipe::reader::boolean_t writeHeader = true; - relpipe::reader::boolean_t writeTypes = false; - InsertMode insertMode = InsertMode::MULTI; // TODO: per-relation mode + class TypeCastRule { + public: + + std::wregex attribute = std::wregex(L".*"); + std::wregex type = std::wregex(L".*"); + relpipe::common::type::StringX sqlType; + }; + + class RelationConfiguration { + public: + relpipe::common::type::StringX relation; + std::wregex relationPattern; + relpipe::common::type::Boolean writeDDL = true; + relpipe::common::type::Boolean writeDML = true; + relpipe::common::type::Boolean writeColumnNames = true; + std::vector typeCastRules; + InsertMode insertMode = InsertMode::DEFAULT; + }; + + std::vector relationConfigurations; virtual ~Configuration() { } diff -r 8a30971d285f -r 202ce847990c src/SQLHandler.h --- a/src/SQLHandler.h Sat Dec 10 20:41:40 2022 +0100 +++ b/src/SQLHandler.h Tue Dec 13 02:07:00 2022 +0100 @@ -48,7 +48,7 @@ std::vector currentAttributes; integer_t valueCount = 0; integer_t recordCount = 0; - string_t currentTable; + string_t currentRelation; /** * @param a @@ -70,6 +70,11 @@ output << '"'; } + static void writeType(std::ostream& output, std::string type) { + if (std::regex_match(type, std::regex("[a-z0-9]+(\\([0-9]+(,\\s*[0-9]+)*\\))?"))) output << type; + else writeIdentifier(output, type); + } + static void writeValue(std::ostream& output, std::string value) { output << '\''; for (auto & ch : value) { @@ -85,12 +90,68 @@ } void endRelation() { + if (getWriteDML()) { + if (getInsertMode() == Configuration::InsertMode::MULTI) { + output << std::endl << ";" << std::endl; + } - if (configuration.insertMode == Configuration::InsertMode::MULTI) { - output << std::endl << ";" << std::endl; + writeRecordCount(); + } + } + + // TODO: code deduplication + + Configuration::InsertMode getInsertMode() { + for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) { + if (std::regex_match(currentRelation, rc.relationPattern)) { + if (rc.insertMode != Configuration::InsertMode::DEFAULT) return rc.insertMode; + } + } + return Configuration::InsertMode::MULTI; + } + + // TODO: code deduplication + + bool getWriteDDL() { + for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) { + if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDDL; + } + return true; + } + + // TODO: code deduplication + + bool getWriteDML() { + for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) { + if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDML; + } + return true; + } + + // TODO: code deduplication + + bool getWriteColumnNames() { + for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) { + if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeColumnNames; + } + return true; + } + + string_t findSQLType(const AttributeMetadata attribute) { + // Type casts provided by the user: + for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) { + if (std::regex_match(currentRelation, rc.relationPattern)) { + for (const Configuration::TypeCastRule tcr : rc.typeCastRules) { + if (std::regex_match(attribute.getAttributeName(), tcr.attribute) && std::regex_match(attribute.getTypeName(), tcr.type)) return tcr.sqlType; + } + } } - writeRecordCount(); + // Default mapping: + if (attribute.getTypeId() == reader::TypeId::BOOLEAN) return L"integer"; + else if (attribute.getTypeId() == reader::TypeId::INTEGER) return L"bigint"; + else if (attribute.getTypeId() == reader::TypeId::STRING) return L"text"; + else throw RelpipeSQLWriterException(L"Type not yet supported in the SQL output: " + attribute.getTypeName()); } public: @@ -100,9 +161,7 @@ void startRelation(string_t name, std::vector attributes) override { // TODO: ALTER TABLE / add columns on duplicate relation name - // TODO: optionally omit CREATE/ALTER table (just INSERT) // TODO: optional transformation to upper/lower case - // TODO: custom data type mapping // TODO: custom primary key or other column properties // TODO: custom table properties // TODO: custom SQL script before/after stream/relation/record @@ -112,82 +171,82 @@ // TODO: optional syntax highlighting? // TODO: share code/behavior with relpipe-tr-sql (but it uses parametrized statements) - if (currentTable.size()) { + if (currentRelation.size()) { endRelation(); - output << std::endl; + if (getWriteDDL() || getWriteDML()) output << std::endl; } - currentTable = name; + currentRelation = name; currentAttributes = attributes; recordCount = 0; valueCount = 0; - output << "CREATE TABLE "; - writeIdentifier(output, convertor.to_bytes(currentTable)); - output << " (" << std::endl; - for (size_t i = 0, limit = attributes.size(); i < limit; i++) { - auto attribute = attributes[i]; - output << "\t"; - writeIdentifier(output, convertor.to_bytes(attribute.getAttributeName())); - // TODO: support all data types + implement RelationalReaderValueHandler - output << " TEXT"; - if (i < (limit - 1)) output << ","; - output << std::endl; + if (getWriteDDL()) { + output << "CREATE TABLE "; + writeIdentifier(output, convertor.to_bytes(currentRelation)); + output << " (" << std::endl; + for (size_t i = 0, limit = attributes.size(); i < limit; i++) { + auto attribute = attributes[i]; + output << "\t"; + writeIdentifier(output, convertor.to_bytes(attribute.getAttributeName())); + // TODO: implement RelationalReaderValueHandler + output << " "; + writeType(output, convertor.to_bytes(findSQLType(attribute))); + if (i < (limit - 1)) output << ","; + output << std::endl; - } - output << ");" << std::endl << std::endl; - - if (currentAttributes.empty()) { - //if (configuration.writeHeader) for (auto attr : attributes) attribute(configuration.writeTypes ? attr.getAttributeName() + L"::" + attr.getTypeName() : attr.getAttributeName()); - } else if (matches(currentAttributes, attributes)) { - // do UNION ALL – just append the records - } else { - // throw RelpipeSQLWriterException(L"To the SQL format we can convert only one relation or multiple relations that have same number of attributes of same types (relation and attribute names may differ – result is named after the first one)."); + } + output << ");" << std::endl; + if (getWriteDML()) output << std::endl; } } void attribute(const string_t& value) override { + if (getWriteDML() == false) return; if (valueCount % currentAttributes.size() == 0) { // TODO: optional use of function/procedure instead of INSERT - // TODO: optional INSERT of multiple records // TODO: custom line-ends + indentation - // TODO: optionally write also the column names recordCount++; - if (configuration.insertMode == Configuration::InsertMode::SINGLE) { + auto insertMode = getInsertMode(); + if (insertMode == Configuration::InsertMode::SINGLE) { output << "INSERT INTO "; - writeIdentifier(output, convertor.to_bytes(currentTable)); + writeIdentifier(output, convertor.to_bytes(currentRelation)); - output << " ("; - for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) { - writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName())); - if (i < (limit - 1)) output << ", "; - } - output << ")"; - - output << " VALUES ("; - } else if (configuration.insertMode == Configuration::InsertMode::MULTI) { - if (recordCount == 1) { - // -------- - output << "INSERT INTO "; - writeIdentifier(output, convertor.to_bytes(currentTable)); - - output << "\n\t("; + if (getWriteColumnNames()) { + output << " ("; for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) { writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName())); if (i < (limit - 1)) output << ", "; } output << ")"; - // -------- + } + + output << " VALUES ("; + } else if (insertMode == Configuration::InsertMode::MULTI) { + if (recordCount == 1) { + output << "INSERT INTO "; + writeIdentifier(output, convertor.to_bytes(currentRelation)); - output << std::endl << "VALUES" << std::endl; + if (getWriteColumnNames()) { + output << "\n\t("; + for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) { + writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName())); + if (i < (limit - 1)) output << ", "; + } + output << ")" << std::endl; + } else { + output << " "; + } + + output << "VALUES" << std::endl; } else { output << "," << std::endl; } output << "\t("; } else { - throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) configuration.insertMode)); + throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode)); } } @@ -204,19 +263,20 @@ if (valueCount % currentAttributes.size()) { output << ", "; } else { - if (configuration.insertMode == Configuration::InsertMode::SINGLE) { + auto insertMode = getInsertMode(); + if (insertMode == Configuration::InsertMode::SINGLE) { output << ");" << std::endl; - } else if (configuration.insertMode == Configuration::InsertMode::MULTI) { + } else if (insertMode == Configuration::InsertMode::MULTI) { output << ")"; } else { - throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) configuration.insertMode)); + throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode)); } valueCount = 0; } } void endOfPipe() { - if (currentTable.size()) { + if (currentRelation.size()) { endRelation(); } output.flush();