src/RecfileCommand.h
author František Kučera <franta-hg@frantovo.cz>
Sat, 06 Apr 2019 18:25:14 +0200
branchv_0
changeset 2 2390e2949a36
parent 1 8dfb42e5c088
child 3 891fe13d7397
permissions -rw-r--r--
parse recfile: separate methods

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <iostream>
#include <string>
#include <sstream>
#include <vector>

#include <relpipe/writer/typedefs.h>

namespace relpipe {
namespace in {
namespace recfile {

using namespace relpipe::writer;

class RecfileCommand {
private:

	enum class RecfileLineType {
		METADATA,
		DATA,
		SEPARATOR,
		COMMENT,
		END,
	};

	class RecfileHandler {
	private:
		RelationalWriter* writer;

		void metadata(const string_t& name, const string_t& value) {
			std::wcerr << L"metadata(" << name << L", " << value << L");" << std::endl; // TODO: remove debug
		}

		void data(const string_t& name, const string_t& value) {
			std::wcerr << L"data(" << name << L", " << value << L");" << std::endl; // TODO: remove debug
		}

		void comment(const string_t& value) {
			std::wcerr << L"comment(" << value << L");" << std::endl; // TODO: remove debug
		}

		void separator() {
			std::wcerr << L"separator()" << std::endl; // TODO: remove debug
		}

		void end() {
			std::wcerr << L"end()" << std::endl; // TODO: remove debug
		}

	public:

		RecfileHandler(RelationalWriter* writer) : writer(writer) {
		}

		virtual ~RecfileHandler() {
		}

		void logicalLine(RecfileLineType type, const string_t& name = L"", const string_t& value = L"") {
			// TODO: writer->startRelation()
			// TODO: writer->writeAttribute()
			switch (type) {
				case RecfileLineType::METADATA: return metadata(name, value);
				case RecfileLineType::DATA: return data(name, value);
				case RecfileLineType::COMMENT: return comment(value);
				case RecfileLineType::SEPARATOR: return separator();
				case RecfileLineType::END: return end();
			}
		}

	};

	enum class ParserState {
		START,
		NAME,
		VALUE,
		VALUE_CONTINUATION,
		COMMENT,
		END,
	};

	class RecfileParser {
	private:
		wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or are recfiles always in UTF-8?
		RecfileHandler& handler;

		void emitLogicalLine(RecfileLineType& type, std::stringstream& name, std::stringstream& value) {
			handler.logicalLine(type, convertor.from_bytes(name.str()), convertor.from_bytes(value.str()));

			name.str("");
			name.clear();
			value.str("");
			value.clear();
			type = RecfileLineType::DATA;
		}

	public:

		RecfileParser(RecfileHandler& handler) : handler(handler) {
		}

		virtual ~RecfileParser() {
		}

		void parse(std::istream& input) {

			ParserState state = ParserState::START;
			RecfileLineType type = RecfileLineType::DATA;
			std::stringstream name;
			std::stringstream value;
			char ch;

			while (state != ParserState::END && input.good()) {
				ch = input.get();
				if (input.eof()) continue;

				switch (state) {
					case ParserState::START:
						if (ch == '%') {
							type = RecfileLineType::METADATA;
							break;
						} else if (ch == ' ') {
							break;
						} else if (ch == '\n') {
							handler.logicalLine(RecfileLineType::SEPARATOR);
							break;
						} else if (ch == '#') {
							type = RecfileLineType::COMMENT;
							state = ParserState::COMMENT;
							if (input.get() != ' ') input.unget();
							break;
						} // else → name
					case ParserState::NAME:
						if (ch == ':') {
							state = ParserState::VALUE;
							if (input.get() != ' ') input.unget();
						} else {
							name << ch;
						}
						break;
					case ParserState::VALUE:
						if (ch == '\n') state = ParserState::VALUE_CONTINUATION;
						else value << ch;
						break;
					case ParserState::VALUE_CONTINUATION:
						if (ch == '+') {
							state = ParserState::VALUE;
							if (value.tellp()) value << '\n';
							if (input.get() != ' ') input.unget();
						} else {
							input.unget();
							state = ParserState::START;
							emitLogicalLine(type, name, value);
						}
						break;
					case ParserState::COMMENT:
						if (ch == '\n') {
							state = ParserState::START;
							emitLogicalLine(type, name, value);
						} else {
							value << ch;
						}
						break;
					default:
						throw RelpipeWriterException(L"Unknown ParserState: " + std::to_wstring((int) state) + L" in RecfileParser."); // TODO: better exception
				}
			}
			emitLogicalLine(type, name, value);
			handler.logicalLine(RecfileLineType::END);
		}
	};

public:

	void process(std::istream& input, std::ostream& output) {
		unique_ptr<RelationalWriter> writer(Factory::create(output));
		RecfileHandler handler(writer.get());
		RecfileParser parser(handler);
		parser.parse(input);
	}
};

}
}
}