src/SqlInputScanner.h
author František Kučera <franta-hg@frantovo.cz>
Thu, 04 Jun 2020 00:46:00 +0200
branchv_0
changeset 47 428c278af4be
parent 33 86ceb97db7de
permissions -rw-r--r--
rename option --data-source-url to --data-source-string In some implementations like JDBC, the connection string is URL, but in ODBC the string is not formally URL, so it is better to use more general term „data source string“ instead of URL. - data source name (DSN) = name of a pre-configured database connection that should be looked-up in configuration and used - data source string (connection string) = arbitrary string containing (in certain encoding which might and might not be URL) all needed parameters (e.g. server name + port + user name + password) Name and string might sometimes be also combined: in ODBC we can e.g. connect to a string: DSN=relpipe;someParameter=foo;someOther=bar which will lookup configuration for the „relpipe“ data source and will combine it with given parameters.

/**
 * Relational pipes
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <sstream>
#include <stdexcept>
#include <string>

namespace relpipe {
namespace tr {
namespace sql {

/**
 * An SQL script may consist of several SQL statements separated by „;“ character.
 * But this character may also occur in literal values or quoted identifiers where it does not separate SQL statements.
 * This scanner reads SQL script character by character and separates particular SQL statements.
 * It recognizes comments, literals, quoted values and other parts (see enum State),
 * so it is able to distinguish which „;“ separates SQL statements from other „;“ that should be ignored.
 * 
 * Usage:
 * Append characters using append() and when it returns true,
 * call getAndReset() and use the returned statement.
 * If there are any remaining characters on the input, start again with appending…
 */
class SqlInputScanner {
private:

	enum class State {
		/** SQL code like keywords, functions, operators or identifiers (not quoted ones) */
		SQL,
		/** classic SQL comment: -- terminated by the line end */
		COMMENT_SINGLE,
		/** comment like this one */
		COMMENT_MULTI,
		/** quoted identifier like "column_name" */
		IDENTIFIER,
		/** string literal like 'some value' */
		STRING_VALUE
	};

	State state = State::SQL;
	bool includeSeparators = true;
	std::wstringstream current;
	wchar_t last = L'\0';

public:
	/**
	 * @param ch next character from the input stream
	 * @return whether the accumulated SQL statement is complete – then call getAndReset(), otherwise continue appending
	 */
	bool append(wchar_t ch);

	/**
	 * Returns current buffer and empties it, so append() can be called again (building new SQL statement)
	 * @return accumulated SQL statement
	 */
	std::wstring getAndReset();

	/**
	 * @param includeSeparators whether „;“ separators should be appended to the buffer
	 * and then returned from getAndReset() as part of the SQL statement
	 */
	void setIncludeSeparators(bool includeSeparators) {
		this->includeSeparators = includeSeparators;
	}
};

bool SqlInputScanner::append(wchar_t ch) {
	if (state != State::SQL || (ch != L';') || (includeSeparators && ch == L';')) current << ch;

	if (state == State::SQL) {
		if (ch == L'"') state = State::IDENTIFIER;
		else if (ch == L'\'') state = State::STRING_VALUE;
		else if (ch == L'-' && last == L'-') state = State::COMMENT_SINGLE;
		else if (ch == L'*' && last == L'/') state = State::COMMENT_MULTI;
	} else if (state == State::COMMENT_SINGLE) {
		if (ch == L'\n') state = State::SQL;
	} else if (state == State::COMMENT_MULTI) {
		if (ch == L'/' && last == L'*') state = State::SQL;
	} else if (state == State::IDENTIFIER) {
		if (ch == L'"') state = State::SQL;
	} else if (state == State::STRING_VALUE) {
		if (ch == L'\'') state = State::SQL;
	} else {
		throw std::domain_error("Unsupported SqlInputScanner state (bug in code)");
	}

	last = ch;
	return state == State::SQL && ch == L';';
};

std::wstring SqlInputScanner::getAndReset() {
	std::wstring str = current.str();
	current.str(L"");
	current.clear();
	return str;
}

}
}
}