src/SqlInputScanner.h
author František Kučera <franta-hg@frantovo.cz>
Sat, 21 Aug 2021 23:13:08 +0200
branchv_0
changeset 55 3a120e64bb37
parent 33 86ceb97db7de
permissions -rw-r--r--
add boolean parameter to --list-data-sources + allow listing and SELECTing at the same time: bash-completion.sh

/**
 * Relational pipes
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <sstream>
#include <stdexcept>
#include <string>

namespace relpipe {
namespace tr {
namespace sql {

/**
 * An SQL script may consist of several SQL statements separated by „;“ character.
 * But this character may also occur in literal values or quoted identifiers where it does not separate SQL statements.
 * This scanner reads SQL script character by character and separates particular SQL statements.
 * It recognizes comments, literals, quoted values and other parts (see enum State),
 * so it is able to distinguish which „;“ separates SQL statements from other „;“ that should be ignored.
 * 
 * Usage:
 * Append characters using append() and when it returns true,
 * call getAndReset() and use the returned statement.
 * If there are any remaining characters on the input, start again with appending…
 */
class SqlInputScanner {
private:

	enum class State {
		/** SQL code like keywords, functions, operators or identifiers (not quoted ones) */
		SQL,
		/** classic SQL comment: -- terminated by the line end */
		COMMENT_SINGLE,
		/** comment like this one */
		COMMENT_MULTI,
		/** quoted identifier like "column_name" */
		IDENTIFIER,
		/** string literal like 'some value' */
		STRING_VALUE
	};

	State state = State::SQL;
	bool includeSeparators = true;
	std::wstringstream current;
	wchar_t last = L'\0';

public:
	/**
	 * @param ch next character from the input stream
	 * @return whether the accumulated SQL statement is complete – then call getAndReset(), otherwise continue appending
	 */
	bool append(wchar_t ch);

	/**
	 * Returns current buffer and empties it, so append() can be called again (building new SQL statement)
	 * @return accumulated SQL statement
	 */
	std::wstring getAndReset();

	/**
	 * @param includeSeparators whether „;“ separators should be appended to the buffer
	 * and then returned from getAndReset() as part of the SQL statement
	 */
	void setIncludeSeparators(bool includeSeparators) {
		this->includeSeparators = includeSeparators;
	}
};

bool SqlInputScanner::append(wchar_t ch) {
	if (state != State::SQL || (ch != L';') || (includeSeparators && ch == L';')) current << ch;

	if (state == State::SQL) {
		if (ch == L'"') state = State::IDENTIFIER;
		else if (ch == L'\'') state = State::STRING_VALUE;
		else if (ch == L'-' && last == L'-') state = State::COMMENT_SINGLE;
		else if (ch == L'*' && last == L'/') state = State::COMMENT_MULTI;
	} else if (state == State::COMMENT_SINGLE) {
		if (ch == L'\n') state = State::SQL;
	} else if (state == State::COMMENT_MULTI) {
		if (ch == L'/' && last == L'*') state = State::SQL;
	} else if (state == State::IDENTIFIER) {
		if (ch == L'"') state = State::SQL;
	} else if (state == State::STRING_VALUE) {
		if (ch == L'\'') state = State::SQL;
	} else {
		throw std::domain_error("Unsupported SqlInputScanner state (bug in code)");
	}

	last = ch;
	return state == State::SQL && ch == L';';
};

std::wstring SqlInputScanner::getAndReset() {
	std::wstring str = current.str();
	current.str(L"");
	current.clear();
	return str;
}

}
}
}