src/SqlInputScanner.h
branchv_0
changeset 33 86ceb97db7de
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SqlInputScanner.h	Sat May 23 23:58:50 2020 +0200
@@ -0,0 +1,115 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace relpipe {
+namespace tr {
+namespace sql {
+
+/**
+ * An SQL script may consist of several SQL statements separated by „;“ character.
+ * But this character may also occur in literal values or quoted identifiers where it does not separate SQL statements.
+ * This scanner reads SQL script character by character and separates particular SQL statements.
+ * It recognizes comments, literals, quoted values and other parts (see enum State),
+ * so it is able to distinguish which „;“ separates SQL statements from other „;“ that should be ignored.
+ * 
+ * Usage:
+ * Append characters using append() and when it returns true,
+ * call getAndReset() and use the returned statement.
+ * If there are any remaining characters on the input, start again with appending…
+ */
+class SqlInputScanner {
+private:
+
+	enum class State {
+		/** SQL code like keywords, functions, operators or identifiers (not quoted ones) */
+		SQL,
+		/** classic SQL comment: -- terminated by the line end */
+		COMMENT_SINGLE,
+		/** comment like this one */
+		COMMENT_MULTI,
+		/** quoted identifier like "column_name" */
+		IDENTIFIER,
+		/** string literal like 'some value' */
+		STRING_VALUE
+	};
+
+	State state = State::SQL;
+	bool includeSeparators = true;
+	std::wstringstream current;
+	wchar_t last = L'\0';
+
+public:
+	/**
+	 * @param ch next character from the input stream
+	 * @return whether the accumulated SQL statement is complete – then call getAndReset(), otherwise continue appending
+	 */
+	bool append(wchar_t ch);
+
+	/**
+	 * Returns current buffer and empties it, so append() can be called again (building new SQL statement)
+	 * @return accumulated SQL statement
+	 */
+	std::wstring getAndReset();
+
+	/**
+	 * @param includeSeparators whether „;“ separators should be appended to the buffer
+	 * and then returned from getAndReset() as part of the SQL statement
+	 */
+	void setIncludeSeparators(bool includeSeparators) {
+		this->includeSeparators = includeSeparators;
+	}
+};
+
+bool SqlInputScanner::append(wchar_t ch) {
+	if (state != State::SQL || (ch != L';') || (includeSeparators && ch == L';')) current << ch;
+
+	if (state == State::SQL) {
+		if (ch == L'"') state = State::IDENTIFIER;
+		else if (ch == L'\'') state = State::STRING_VALUE;
+		else if (ch == L'-' && last == L'-') state = State::COMMENT_SINGLE;
+		else if (ch == L'*' && last == L'/') state = State::COMMENT_MULTI;
+	} else if (state == State::COMMENT_SINGLE) {
+		if (ch == L'\n') state = State::SQL;
+	} else if (state == State::COMMENT_MULTI) {
+		if (ch == L'/' && last == L'*') state = State::SQL;
+	} else if (state == State::IDENTIFIER) {
+		if (ch == L'"') state = State::SQL;
+	} else if (state == State::STRING_VALUE) {
+		if (ch == L'\'') state = State::SQL;
+	} else {
+		throw std::domain_error("Unsupported SqlInputScanner state (bug in code)");
+	}
+
+	last = ch;
+	return state == State::SQL && ch == L';';
+};
+
+std::wstring SqlInputScanner::getAndReset() {
+	std::wstring str = current.str();
+	current.str(L"");
+	current.clear();
+	return str;
+}
+
+}
+}
+}
\ No newline at end of file