src/RecfileCommand.h
author František Kučera <franta-hg@frantovo.cz>
Sun, 07 Apr 2019 01:10:31 +0200
branchv_0
changeset 3 891fe13d7397
parent 2 2390e2949a36
child 4 b5239b4b345b
permissions -rw-r--r--
handler logic structure

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include <set>

#include <relpipe/writer/typedefs.h>
#include <relpipe/writer/RelationalWriter.h>
#include <relpipe/writer/AttributeMetadata.h>

namespace relpipe {
namespace in {
namespace recfile {

using namespace relpipe::writer;

class RecfileCommand {
private:

	enum class RecfileLineType {
		METADATA,
		DATA,
		SEPARATOR,
		COMMENT,
		END,
	};

	class RecfileHandler {
	private:
		RelationalWriter* writer;
		string_t currentRelationName;
		std::vector<string_t> currentRecord;
		std::vector<std::vector<string_t>> currentRecords;
		size_t prefetchCount = 1;
		bool headerWritten = false;

		void writeHeader() {
			if (headerWritten) return;

			std::vector<AttributeMetadata> attributeMetadata;

			// TODO: writer->startRelation(currentRelationName, attributeMetadata, true);

			headerWritten = true;
		}

		void writeRecords() {
			for (int i = 0; i < currentRecords.size(); i++) {
				std::vector<string_t> record = currentRecords[i];
				std::wcerr << L"record: " << std::endl;
				for (int j = 0; j < record.size(); j += 2) {
					std::wcerr << L"  '" << record[j] << L"': '" << record[j + 1] << L"'" << std::endl;
					// TODO: writer->writeAttribute()
				}
			}
			currentRecords.clear();
		}

		void metadata(const string_t& name, const string_t& value) {
			std::wcerr << L"metadata(" << name << L", " << value << L");" << std::endl; // TODO: remove debug

			if (name == L"rec") {
				currentRelationName = value;
				currentRecord.clear();
				currentRecords.clear();
				headerWritten = false;
			} else if (name == L"type") {
				// TODO: save type hint
			} else {
				// ignore – other recfile metadata like keys or auto-increments
			}
		}

		void data(const string_t& name, const string_t& value) {
			std::wcerr << L"data(" << name << L", " << value << L");" << std::endl; // TODO: remove debug
			currentRecord.push_back(name);
			currentRecord.push_back(value);
		}

		void comment(const string_t& value) {
			// ignore comments
		}

		void separator() {
			std::wcerr << L"separator()" << std::endl; // TODO: remove debug
			if (currentRecord.size()) {
				currentRecords.push_back(currentRecord);
				currentRecord.clear();
			}

			if (prefetchCount > 0 && currentRecords.size() >= prefetchCount) {
				writeHeader();
				writeRecords();
			}
		}

		void end() {
			std::wcerr << L"end();" << std::endl; // TODO: remove debug
			if (currentRecord.size()) currentRecords.push_back(currentRecord);
			writeHeader();
			writeRecords();
		}

	public:

		RecfileHandler(RelationalWriter* writer) : writer(writer) {
		}

		virtual ~RecfileHandler() {
		}

		void logicalLine(RecfileLineType type, const string_t& name = L"", const string_t& value = L"") {
			switch (type) {
				case RecfileLineType::METADATA: return metadata(name, value);
				case RecfileLineType::DATA: return data(name, value);
				case RecfileLineType::COMMENT: return comment(value);
				case RecfileLineType::SEPARATOR: return separator();
				case RecfileLineType::END: return end();
			}
		}

	};

	enum class ParserState {
		START,
		NAME,
		VALUE,
		VALUE_CONTINUATION,
		COMMENT,
		END,
	};

	class RecfileParser {
	private:
		wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or are recfiles always in UTF-8?
		RecfileHandler& handler;

		void emitLogicalLine(RecfileLineType& type, std::stringstream& name, std::stringstream& value) {
			handler.logicalLine(type, convertor.from_bytes(name.str()), convertor.from_bytes(value.str()));

			name.str("");
			name.clear();
			value.str("");
			value.clear();
			type = RecfileLineType::DATA;
		}

	public:

		RecfileParser(RecfileHandler& handler) : handler(handler) {
		}

		virtual ~RecfileParser() {
		}

		void parse(std::istream& input) {

			ParserState state = ParserState::START;
			RecfileLineType type = RecfileLineType::DATA;
			std::stringstream name;
			std::stringstream value;
			char ch;

			while (state != ParserState::END && input.good()) {
				ch = input.get();
				if (input.eof()) continue;

				switch (state) {
					case ParserState::START:
						if (ch == '%') {
							type = RecfileLineType::METADATA;
							break;
						} else if (ch == ' ') {
							break;
						} else if (ch == '\n') {
							handler.logicalLine(RecfileLineType::SEPARATOR);
							break;
						} else if (ch == '#') {
							type = RecfileLineType::COMMENT;
							state = ParserState::COMMENT;
							if (input.get() != ' ') input.unget();
							break;
						} // else → name
					case ParserState::NAME:
						if (ch == ':') {
							state = ParserState::VALUE;
							if (input.get() != ' ') input.unget();
						} else {
							name << ch;
						}
						break;
					case ParserState::VALUE:
						if (ch == '\n') state = ParserState::VALUE_CONTINUATION;
						else value << ch;
						break;
					case ParserState::VALUE_CONTINUATION:
						if (ch == '+') {
							state = ParserState::VALUE;
							if (value.tellp()) value << '\n';
							if (input.get() != ' ') input.unget();
						} else {
							input.unget();
							state = ParserState::START;
							emitLogicalLine(type, name, value);
						}
						break;
					case ParserState::COMMENT:
						if (ch == '\n') {
							state = ParserState::START;
							emitLogicalLine(type, name, value);
						} else {
							value << ch;
						}
						break;
					default:
						throw RelpipeWriterException(L"Unknown ParserState: " + std::to_wstring((int) state) + L" in RecfileParser."); // TODO: better exception
				}
			}
			emitLogicalLine(type, name, value);
			handler.logicalLine(RecfileLineType::END);
		}
	};

public:

	void process(std::istream& input, std::ostream& output) {
		unique_ptr<RelationalWriter> writer(Factory::create(output));
		RecfileHandler handler(writer.get());
		RecfileParser parser(handler);
		parser.parse(input);
	}
};

}
}
}