SqlInputScanner for parsing SQL script and separating particular statements; does not depend on sqlite3_complete() v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sat, 23 May 2020 23:58:50 +0200
branchv_0
changeset 33 86ceb97db7de
parent 32 77180ee275df
child 34 24c05e69d68f
SqlInputScanner for parsing SQL script and separating particular statements; does not depend on sqlite3_complete()
src/PreparedStatement.cpp
src/PreparedStatement.h
src/SqlHandler.h
src/SqlInputScanner.h
--- a/src/PreparedStatement.cpp	Fri May 08 12:51:01 2020 +0200
+++ b/src/PreparedStatement.cpp	Sat May 23 23:58:50 2020 +0200
@@ -101,10 +101,6 @@
 	return value ? value : ""; // TODO: support NULL values (when supported in relpipe format)
 }
 
-bool PreparedStatement::isComplete(const char* sql) {
-	return sqlite3_complete(sql);
-}
-
 }
 }
 }
--- a/src/PreparedStatement.h	Fri May 08 12:51:01 2020 +0200
+++ b/src/PreparedStatement.h	Sat May 23 23:58:50 2020 +0200
@@ -41,7 +41,6 @@
 	std::string getColumName(int columnIndex);
 	relpipe::writer::TypeId getColumType(int columnIndex, relpipe::writer::TypeId defaultType = relpipe::writer::TypeId::STRING);
 	std::string getString(int columnIndex);
-	static bool isComplete(const char *sql); // TODO: use own implementation + move to a separate class
 };
 
 }
--- a/src/SqlHandler.h	Fri May 08 12:51:01 2020 +0200
+++ b/src/SqlHandler.h	Sat May 23 23:58:50 2020 +0200
@@ -36,6 +36,7 @@
 
 #include "Configuration.h"
 #include "SqlException.h"
+#include "SqlInputScanner.h"
 #include "PreparedStatement.h"
 #include "Connection.h"
 
@@ -63,12 +64,16 @@
 		sql->str(L"");
 		sql->clear();
 
+		SqlInputScanner scanner;
+
 		for (wchar_t ch; *input >> ch;) {
-			*sql << ch;
-			if (ch == L';' && PreparedStatement::isComplete(convertor.to_bytes(sql->str()).c_str())) return true;
+			if (scanner.append(ch)) {
+				*sql << scanner.getAndReset().c_str();
+				return true;
+			}
 		}
 
-		string_t remainingSql = sql->str();
+		string_t remainingSql = scanner.getAndReset();
 		for (wchar_t ch : remainingSql) if (ch != L' ' && ch != L'\n' && ch != L'\r' && ch != L'\t') throw SqlException(L"Unexpected EOF, missing „;“ after: „" + remainingSql + L"“");
 
 		return false;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SqlInputScanner.h	Sat May 23 23:58:50 2020 +0200
@@ -0,0 +1,115 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace relpipe {
+namespace tr {
+namespace sql {
+
+/**
+ * An SQL script may consist of several SQL statements separated by „;“ character.
+ * But this character may also occur in literal values or quoted identifiers where it does not separate SQL statements.
+ * This scanner reads SQL script character by character and separates particular SQL statements.
+ * It recognizes comments, literals, quoted values and other parts (see enum State),
+ * so it is able to distinguish which „;“ separates SQL statements from other „;“ that should be ignored.
+ * 
+ * Usage:
+ * Append characters using append() and when it returns true,
+ * call getAndReset() and use the returned statement.
+ * If there are any remaining characters on the input, start again with appending…
+ */
+class SqlInputScanner {
+private:
+
+	enum class State {
+		/** SQL code like keywords, functions, operators or identifiers (not quoted ones) */
+		SQL,
+		/** classic SQL comment: -- terminated by the line end */
+		COMMENT_SINGLE,
+		/** comment like this one */
+		COMMENT_MULTI,
+		/** quoted identifier like "column_name" */
+		IDENTIFIER,
+		/** string literal like 'some value' */
+		STRING_VALUE
+	};
+
+	State state = State::SQL;
+	bool includeSeparators = true;
+	std::wstringstream current;
+	wchar_t last = L'\0';
+
+public:
+	/**
+	 * @param ch next character from the input stream
+	 * @return whether the accumulated SQL statement is complete – then call getAndReset(), otherwise continue appending
+	 */
+	bool append(wchar_t ch);
+
+	/**
+	 * Returns current buffer and empties it, so append() can be called again (building new SQL statement)
+	 * @return accumulated SQL statement
+	 */
+	std::wstring getAndReset();
+
+	/**
+	 * @param includeSeparators whether „;“ separators should be appended to the buffer
+	 * and then returned from getAndReset() as part of the SQL statement
+	 */
+	void setIncludeSeparators(bool includeSeparators) {
+		this->includeSeparators = includeSeparators;
+	}
+};
+
+bool SqlInputScanner::append(wchar_t ch) {
+	if (state != State::SQL || (ch != L';') || (includeSeparators && ch == L';')) current << ch;
+
+	if (state == State::SQL) {
+		if (ch == L'"') state = State::IDENTIFIER;
+		else if (ch == L'\'') state = State::STRING_VALUE;
+		else if (ch == L'-' && last == L'-') state = State::COMMENT_SINGLE;
+		else if (ch == L'*' && last == L'/') state = State::COMMENT_MULTI;
+	} else if (state == State::COMMENT_SINGLE) {
+		if (ch == L'\n') state = State::SQL;
+	} else if (state == State::COMMENT_MULTI) {
+		if (ch == L'/' && last == L'*') state = State::SQL;
+	} else if (state == State::IDENTIFIER) {
+		if (ch == L'"') state = State::SQL;
+	} else if (state == State::STRING_VALUE) {
+		if (ch == L'\'') state = State::SQL;
+	} else {
+		throw std::domain_error("Unsupported SqlInputScanner state (bug in code)");
+	}
+
+	last = ch;
+	return state == State::SQL && ch == L';';
+};
+
+std::wstring SqlInputScanner::getAndReset() {
+	std::wstring str = current.str();
+	current.str(L"");
+	current.clear();
+	return str;
+}
+
+}
+}
+}
\ No newline at end of file