configuration: --write-ddl, --write-dml, --write-column-names, --insert-mode, --type-cast v_0 tip
authorFrantišek Kučera <franta-hg@frantovo.cz>
Tue, 13 Dec 2022 02:07:00 +0100
branchv_0
changeset 3 202ce847990c
parent 2 8a30971d285f
configuration: --write-ddl, --write-dml, --write-column-names, --insert-mode, --type-cast
bash-completion.sh
src/CLIParser.h
src/Configuration.h
src/SQLHandler.h
--- a/bash-completion.sh	Sat Dec 10 20:41:40 2022 +0100
+++ b/bash-completion.sh	Tue Dec 13 02:07:00 2022 +0100
@@ -22,17 +22,50 @@
 	w2=${COMP_WORDS[COMP_CWORD-2]}
 	w3=${COMP_WORDS[COMP_CWORD-3]}
 
-	WRITE_HEADER=(
+	BOOLEAN_VALUES=(
 		"true"
 		"false"
 	)
 
-	  if [[ "$w1" == "--write-header"                                   ]];    then COMPREPLY=($(compgen -W "${WRITE_HEADER[*]}" -- "$w0"))
-	elif [[ "$w1" == "--write-types"                                    ]];    then COMPREPLY=($(compgen -W "${WRITE_HEADER[*]}" -- "$w0"))
+	INSERT_MODES=(
+		"single"
+		"multi"
+	)
+
+	RELPIPE_TYPES=(
+		"boolean"
+		"integer"
+		"string"
+	)
+
+	SQL_TYPES=(
+		"text"
+		"integer"
+		"bigint"
+		"numeric"
+		"varchar(20)"
+		"bit"
+		"date"
+		"time"
+		"timestamp"
+	)
+
+	  if [[ "$w1" == "--relation"                      && "x$w0" == "x" ]];    then COMPREPLY=("'.*'")
+	elif [[ "$w1" == "--write-ddl"                                      ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+	elif [[ "$w1" == "--write-dml"                                      ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+	elif [[ "$w1" == "--write-column-names"                             ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+	elif [[ "$w1" == "--insert-mode"                                    ]];    then COMPREPLY=($(compgen -W "${INSERT_MODES[*]}" -- "$w0"))
+	elif [[ "$w1" == "--type-cast"                     && "x$w0" == "x" ]];    then COMPREPLY=("'.*'")
+	elif [[ "$w2" == "--type-cast"                                      ]];    then COMPREPLY=($(compgen -W "${RELPIPE_TYPES[*]}" -- "$w0"))
+	elif [[ "$w3" == "--type-cast"                                      ]];    then COMPREPLY=($(compgen -W "${SQL_TYPES[*]}" -- "$w0"))
 	else
 		OPTIONS=(
-			"--write-header"
-			"--write-types"
+			"--relation"
+			"--write-ddl"
+			"--write-dml"
+			"--write-column-names"
+			"--insert-mode"
+			"--type-cast"
 		)
 		COMPREPLY=($(compgen -W "${OPTIONS[*]}" -- "$w0"))
 	fi
--- a/src/CLIParser.h	Sat Dec 10 20:41:40 2022 +0100
+++ b/src/CLIParser.h	Tue Dec 13 02:07:00 2022 +0100
@@ -46,23 +46,65 @@
 		else throw relpipe::cli::RelpipeCLIException(L"Unable to parse boolean value: " + value + L" (expecting true or false)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
 	}
 
+	Configuration::InsertMode parseInsertMode(const relpipe::reader::string_t& value) {
+		if (value == L"single") return Configuration::InsertMode::SINGLE;
+		else if (value == L"multi") return Configuration::InsertMode::MULTI;
+		else throw relpipe::cli::RelpipeCLIException(L"Unable to parse InsertMode value: " + value + L" (expecting single or multi)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
+	}
+
+	void updateRelationName(Configuration::RelationConfiguration& currentRelation) {
+		if (currentRelation.relation.size() == 0) currentRelation.relation = L".*";
+	}
+
+	void addRelation(Configuration& c, Configuration::RelationConfiguration& currentRelation) {
+		if (currentRelation.relation.size()) {
+			currentRelation.relationPattern = std::wregex(currentRelation.relation);
+			c.relationConfigurations.push_back(currentRelation);
+			currentRelation = Configuration::RelationConfiguration();
+		}
+	}
+
 public:
 
-	static const relpipe::reader::string_t OPTION_WRITE_HEADER;
-	static const relpipe::reader::string_t OPTION_WRITE_TYPES;
+	static const relpipe::reader::string_t OPTION_RELATION;
+	static const relpipe::reader::string_t OPTION_WRITE_DDL;
+	static const relpipe::reader::string_t OPTION_WRITE_DML;
+	static const relpipe::reader::string_t OPTION_WRITE_COLUMN_NAMES;
+	static const relpipe::reader::string_t OPTION_INSERT_MODE;
+	static const relpipe::reader::string_t OPTION_TYPE_CAST;
 
 	Configuration parse(const std::vector<relpipe::reader::string_t>& arguments) {
 		Configuration c;
+		Configuration::RelationConfiguration currentRelation;
 
 		for (int i = 0; i < arguments.size();) {
 			relpipe::reader::string_t option = readNext(arguments, i);
 
-			if (option == OPTION_WRITE_HEADER) {
-				c.writeHeader = parseBoolean(readNext(arguments, i));
-			} else if (option == OPTION_WRITE_TYPES) {
-				c.writeTypes = parseBoolean(readNext(arguments, i));
+			if (option == OPTION_RELATION) {
+				addRelation(c, currentRelation); // previous relation
+				currentRelation.relation = readNext(arguments, i);
+			} else if (option == OPTION_WRITE_DDL) {
+				updateRelationName(currentRelation);
+				currentRelation.writeDDL = parseBoolean(readNext(arguments, i));
+			} else if (option == OPTION_WRITE_DML) {
+				updateRelationName(currentRelation);
+				currentRelation.writeDML = parseBoolean(readNext(arguments, i));
+			} else if (option == OPTION_WRITE_COLUMN_NAMES) {
+				updateRelationName(currentRelation);
+				currentRelation.writeColumnNames = parseBoolean(readNext(arguments, i));
+			} else if (option == OPTION_INSERT_MODE) {
+				updateRelationName(currentRelation);
+				currentRelation.insertMode = parseInsertMode(readNext(arguments, i));
+			} else if (option == OPTION_TYPE_CAST) {
+				updateRelationName(currentRelation);
+				Configuration::TypeCastRule tcr;
+				tcr.attribute = std::wregex(readNext(arguments, i));
+				tcr.type = std::wregex(readNext(arguments, i));
+				tcr.sqlType = readNext(arguments, i);
+				currentRelation.typeCastRules.push_back(tcr);
 			} else throw relpipe::cli::RelpipeCLIException(L"Unsupported CLI option: " + option, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
 		}
+		addRelation(c, currentRelation); // last relation
 
 		return c;
 	}
@@ -71,8 +113,12 @@
 	}
 };
 
-const relpipe::reader::string_t CLIParser::OPTION_WRITE_HEADER = L"--write-header";
-const relpipe::reader::string_t CLIParser::OPTION_WRITE_TYPES = L"--write-types";
+const relpipe::common::type::StringX CLIParser::OPTION_RELATION = L"--relation";
+const relpipe::common::type::StringX CLIParser::OPTION_WRITE_DDL = L"--write-ddl";
+const relpipe::common::type::StringX CLIParser::OPTION_WRITE_DML = L"--write-dml";
+const relpipe::common::type::StringX CLIParser::OPTION_WRITE_COLUMN_NAMES = L"--write-column-names";
+const relpipe::common::type::StringX CLIParser::OPTION_INSERT_MODE = L"--insert-mode";
+const relpipe::common::type::StringX CLIParser::OPTION_TYPE_CAST = L"--type-cast";
 
 }
 }
--- a/src/Configuration.h	Sat Dec 10 20:41:40 2022 +0100
+++ b/src/Configuration.h	Tue Dec 13 02:07:00 2022 +0100
@@ -18,7 +18,9 @@
 
 #include <vector>
 #include <iostream>
+#include <regex>
 
+#include <relpipe/common/type/typedefs.h>
 #include <relpipe/reader/typedefs.h>
 
 
@@ -30,13 +32,31 @@
 public:
 
 	enum class InsertMode {
+		DEFAULT,
 		SINGLE,
 		MULTI,
 	};
 
-	relpipe::reader::boolean_t writeHeader = true;
-	relpipe::reader::boolean_t writeTypes = false;
-	InsertMode insertMode = InsertMode::MULTI; // TODO: per-relation mode
+	class TypeCastRule {
+	public:
+
+		std::wregex attribute = std::wregex(L".*");
+		std::wregex type = std::wregex(L".*");
+		relpipe::common::type::StringX sqlType;
+	};
+
+	class RelationConfiguration {
+	public:
+		relpipe::common::type::StringX relation;
+		std::wregex relationPattern;
+		relpipe::common::type::Boolean writeDDL = true;
+		relpipe::common::type::Boolean writeDML = true;
+		relpipe::common::type::Boolean writeColumnNames = true;
+		std::vector<TypeCastRule> typeCastRules;
+		InsertMode insertMode = InsertMode::DEFAULT;
+	};
+
+	std::vector<RelationConfiguration> relationConfigurations;
 
 	virtual ~Configuration() {
 	}
--- a/src/SQLHandler.h	Sat Dec 10 20:41:40 2022 +0100
+++ b/src/SQLHandler.h	Tue Dec 13 02:07:00 2022 +0100
@@ -48,7 +48,7 @@
 	std::vector<AttributeMetadata> currentAttributes;
 	integer_t valueCount = 0;
 	integer_t recordCount = 0;
-	string_t currentTable;
+	string_t currentRelation;
 
 	/**
 	 * @param a
@@ -70,6 +70,11 @@
 		output << '"';
 	}
 
+	static void writeType(std::ostream& output, std::string type) {
+		if (std::regex_match(type, std::regex("[a-z0-9]+(\\([0-9]+(,\\s*[0-9]+)*\\))?"))) output << type;
+		else writeIdentifier(output, type);
+	}
+
 	static void writeValue(std::ostream& output, std::string value) {
 		output << '\'';
 		for (auto & ch : value) {
@@ -85,12 +90,68 @@
 	}
 
 	void endRelation() {
+		if (getWriteDML()) {
+			if (getInsertMode() == Configuration::InsertMode::MULTI) {
+				output << std::endl << ";" << std::endl;
+			}
 
-		if (configuration.insertMode == Configuration::InsertMode::MULTI) {
-			output << std::endl << ";" << std::endl;
+			writeRecordCount();
+		}
+	}
+
+	// TODO: code deduplication
+
+	Configuration::InsertMode getInsertMode() {
+		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+			if (std::regex_match(currentRelation, rc.relationPattern)) {
+				if (rc.insertMode != Configuration::InsertMode::DEFAULT) return rc.insertMode;
+			}
+		}
+		return Configuration::InsertMode::MULTI;
+	}
+
+	// TODO: code deduplication
+
+	bool getWriteDDL() {
+		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+			if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDDL;
+		}
+		return true;
+	}
+
+	// TODO: code deduplication
+
+	bool getWriteDML() {
+		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+			if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeDML;
+		}
+		return true;
+	}
+
+	// TODO: code deduplication
+
+	bool getWriteColumnNames() {
+		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+			if (std::regex_match(currentRelation, rc.relationPattern)) return rc.writeColumnNames;
+		}
+		return true;
+	}
+
+	string_t findSQLType(const AttributeMetadata attribute) {
+		// Type casts provided by the user:
+		for (const Configuration::RelationConfiguration& rc : configuration.relationConfigurations) {
+			if (std::regex_match(currentRelation, rc.relationPattern)) {
+				for (const Configuration::TypeCastRule tcr : rc.typeCastRules) {
+					if (std::regex_match(attribute.getAttributeName(), tcr.attribute) && std::regex_match(attribute.getTypeName(), tcr.type)) return tcr.sqlType;
+				}
+			}
 		}
 
-		writeRecordCount();
+		// Default mapping:
+		if (attribute.getTypeId() == reader::TypeId::BOOLEAN) return L"integer";
+		else if (attribute.getTypeId() == reader::TypeId::INTEGER) return L"bigint";
+		else if (attribute.getTypeId() == reader::TypeId::STRING) return L"text";
+		else throw RelpipeSQLWriterException(L"Type not yet supported in the SQL output: " + attribute.getTypeName());
 	}
 
 public:
@@ -100,9 +161,7 @@
 
 	void startRelation(string_t name, std::vector<AttributeMetadata> attributes) override {
 		// TODO: ALTER TABLE / add columns on duplicate relation name
-		// TODO: optionally omit CREATE/ALTER table (just INSERT)
 		// TODO: optional transformation to upper/lower case
-		// TODO: custom data type mapping
 		// TODO: custom primary key or other column properties
 		// TODO: custom table properties
 		// TODO: custom SQL script before/after stream/relation/record
@@ -112,82 +171,82 @@
 		// TODO: optional syntax highlighting?
 		// TODO: share code/behavior with relpipe-tr-sql (but it uses parametrized statements)
 
-		if (currentTable.size()) {
+		if (currentRelation.size()) {
 			endRelation();
-			output << std::endl;
+			if (getWriteDDL() || getWriteDML()) output << std::endl;
 		}
 
-		currentTable = name;
+		currentRelation = name;
 		currentAttributes = attributes;
 		recordCount = 0;
 		valueCount = 0;
 
-		output << "CREATE TABLE ";
-		writeIdentifier(output, convertor.to_bytes(currentTable));
-		output << " (" << std::endl;
-		for (size_t i = 0, limit = attributes.size(); i < limit; i++) {
-			auto attribute = attributes[i];
-			output << "\t";
-			writeIdentifier(output, convertor.to_bytes(attribute.getAttributeName()));
-			// TODO: support all data types + implement RelationalReaderValueHandler
-			output << " TEXT";
-			if (i < (limit - 1)) output << ",";
-			output << std::endl;
+		if (getWriteDDL()) {
+			output << "CREATE TABLE ";
+			writeIdentifier(output, convertor.to_bytes(currentRelation));
+			output << " (" << std::endl;
+			for (size_t i = 0, limit = attributes.size(); i < limit; i++) {
+				auto attribute = attributes[i];
+				output << "\t";
+				writeIdentifier(output, convertor.to_bytes(attribute.getAttributeName()));
+				// TODO: implement RelationalReaderValueHandler
+				output << " ";
+				writeType(output, convertor.to_bytes(findSQLType(attribute)));
+				if (i < (limit - 1)) output << ",";
+				output << std::endl;
 
-		}
-		output << ");" << std::endl << std::endl;
-
-		if (currentAttributes.empty()) {
-			//if (configuration.writeHeader) for (auto attr : attributes) attribute(configuration.writeTypes ? attr.getAttributeName() + L"::" + attr.getTypeName() : attr.getAttributeName());
-		} else if (matches(currentAttributes, attributes)) {
-			// do UNION ALL – just append the records
-		} else {
-			// throw RelpipeSQLWriterException(L"To the SQL format we can convert only one relation or multiple relations that have same number of attributes of same types (relation and attribute names may differ – result is named after the first one).");
+			}
+			output << ");" << std::endl;
+			if (getWriteDML()) output << std::endl;
 		}
 	}
 
 	void attribute(const string_t& value) override {
+		if (getWriteDML() == false) return;
 
 		if (valueCount % currentAttributes.size() == 0) {
 			// TODO: optional use of function/procedure instead of INSERT
-			// TODO: optional INSERT of multiple records
 			// TODO: custom line-ends + indentation
-			// TODO: optionally write also the column names
 			recordCount++;
 
-			if (configuration.insertMode == Configuration::InsertMode::SINGLE) {
+			auto insertMode = getInsertMode();
+			if (insertMode == Configuration::InsertMode::SINGLE) {
 				output << "INSERT INTO ";
-				writeIdentifier(output, convertor.to_bytes(currentTable));
+				writeIdentifier(output, convertor.to_bytes(currentRelation));
 
-				output << " (";
-				for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
-					writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
-					if (i < (limit - 1)) output << ", ";
-				}
-				output << ")";
-
-				output << " VALUES (";
-			} else if (configuration.insertMode == Configuration::InsertMode::MULTI) {
-				if (recordCount == 1) {
-					// --------
-					output << "INSERT INTO ";
-					writeIdentifier(output, convertor.to_bytes(currentTable));
-
-					output << "\n\t(";
+				if (getWriteColumnNames()) {
+					output << " (";
 					for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
 						writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
 						if (i < (limit - 1)) output << ", ";
 					}
 					output << ")";
-					// --------
+				}
+
+				output << " VALUES (";
+			} else if (insertMode == Configuration::InsertMode::MULTI) {
+				if (recordCount == 1) {
+					output << "INSERT INTO ";
+					writeIdentifier(output, convertor.to_bytes(currentRelation));
 
-					output << std::endl << "VALUES" << std::endl;
+					if (getWriteColumnNames()) {
+						output << "\n\t(";
+						for (size_t i = 0, limit = currentAttributes.size(); i < limit; i++) {
+							writeIdentifier(output, convertor.to_bytes(currentAttributes[i].getAttributeName()));
+							if (i < (limit - 1)) output << ", ";
+						}
+						output << ")" << std::endl;
+					} else {
+						output << " ";
+					}
+
+					output << "VALUES" << std::endl;
 				} else {
 					output << "," << std::endl;
 				}
 				output << "\t(";
 			} else {
-				throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) configuration.insertMode));
+				throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode));
 			}
 		}
 
@@ -204,19 +263,20 @@
 		if (valueCount % currentAttributes.size()) {
 			output << ", ";
 		} else {
-			if (configuration.insertMode == Configuration::InsertMode::SINGLE) {
+			auto insertMode = getInsertMode();
+			if (insertMode == Configuration::InsertMode::SINGLE) {
 				output << ");" << std::endl;
-			} else if (configuration.insertMode == Configuration::InsertMode::MULTI) {
+			} else if (insertMode == Configuration::InsertMode::MULTI) {
 				output << ")";
 			} else {
-				throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) configuration.insertMode));
+				throw RelpipeSQLWriterException(L"Unsupported InsertMode: " + std::to_wstring((int) insertMode));
 			}
 			valueCount = 0;
 		}
 	}
 
 	void endOfPipe() {
-		if (currentTable.size()) {
+		if (currentRelation.size()) {
 			endRelation();
 		}
 		output.flush();