src/YAMLCommand.h
author František Kučera <franta-hg@frantovo.cz>
Thu, 03 Dec 2020 22:31:03 +0100
branchv_0
changeset 2 d68192f0e960
parent 1 8d2d8f4077af
child 3 64f8f8792470
permissions -rw-r--r--
convert YAML to relations Expected structure is: map: key = relation name value = sequence of maps (records) where: key = attribute name value = attribute value Example: -------- fruit: - id: 1 name: cherry - id: 2 name: apple - id: 3 name: plum empty: # this relation has no attributes and will be ignored sparse: - a: aaa b: bbb c: ccc - a: AAA b: BBB - c: C d: D e: E a: A f: F - c: CCC -------- Not yet supported: -------- my_relation_with_metadata: attribute-metadata: - name: a type: string - name: b type: boolean record: - a: a - b: true --------

/**
 * Relational pipes
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <cstdlib>
#include <iostream>
#include <memory>
#include <vector>

#include <yaml.h>

#include <relpipe/writer/RelationalWriter.h>
#include <relpipe/writer/typedefs.h>

namespace relpipe {
namespace in {
namespace yaml {

class YAMLCommand {
private:
	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // YAML strings are in UTF-8

	class YAMLEvent {
	private:
		yaml_event_t event;
	public:

		YAMLEvent(yaml_event_t event) : event(event) {
		}

		virtual ~YAMLEvent() {
			yaml_event_delete(&event);
		}

		const yaml_event_type_t getType() const {
			return event.type;
		}

		const yaml_event_t* getEvent() const {
			return &event;
		}

		YAMLEvent(const YAMLEvent&) = delete;
		YAMLEvent& operator=(const YAMLEvent&) = delete;
	};

	class YAMLParser {
	private:
		yaml_parser_t yamlParser;
		std::istream* input = nullptr;

		static int readFromInput(void* instance, unsigned char* buffer, size_t size, size_t* length) {
			std::istream* input = ((YAMLParser*) instance)->input;
			input->read((char*) buffer, size);
			*length = input->gcount();
			return (input->good() || input->eof()) ? 1 : 0;
		}
	public:

		YAMLParser() {
			yaml_parser_initialize(&yamlParser);
		}

		virtual ~YAMLParser() {
			yaml_parser_delete(&yamlParser);
		}

		YAMLParser(const YAMLParser&) = delete;
		YAMLParser& operator=(const YAMLParser&) = delete;

		void setInput(std::istream* input) {
			this->input = input;
			yaml_parser_set_input(&yamlParser, readFromInput, (void*) this);
		}

		YAMLEvent* next() {
			yaml_event_t event;
			int result = yaml_parser_parse(&yamlParser, &event);
			return result == 1 && event.type != yaml_event_type_e::YAML_NO_EVENT ? new YAMLEvent(event) : nullptr; // 1 = OK in yaml.h; YAML_NO_EVENT = end
		}
	};

	YAMLParser parser;

	using YAMLEvent_p = std::shared_ptr<YAMLEvent>;

	enum class State {
		START,
		RELATIONS,
		ATTRIBUTE,
		RECORDS
	};

	State state;

	relpipe::writer::string_t y2s(yaml_char_t* value) {
		return value ? convertor.from_bytes((const char*) value) : L"";
	}

	relpipe::writer::string_t findValue(std::vector<relpipe::writer::string_t> record, relpipe::writer::string_t key) {
		if (record.size() % 2) relpipe::writer::RelpipeWriterException(L"Invalid count of values in the record vector");
		for (size_t i = 0; i < record.size(); i += 2) {
			if (record[i] == key) return record[i + 1];
		}
		return L"";
	}

public:

	YAMLCommand() {
	}

	virtual ~YAMLCommand() {
	}

	void process(std::istream& input, std::shared_ptr<relpipe::writer::RelationalWriter> writer) {
		parser.setInput(&input);

		relpipe::writer::string_t relationName;
		std::vector<relpipe::writer::string_t> record;
		std::vector<relpipe::writer::AttributeMetadata> attributesMetadata;

		state = State::START;

		for (YAMLEvent_p event = YAMLEvent_p(parser.next()); event; event = YAMLEvent_p(parser.next())) {
			if (event->getType() == YAML_NO_EVENT) {
				throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: YAML_NO_EVENT");
			} else if (event->getType() == YAML_STREAM_START_EVENT) {
			} else if (event->getType() == YAML_STREAM_END_EVENT) {
			} else if (event->getType() == YAML_DOCUMENT_START_EVENT) {
			} else if (event->getType() == YAML_DOCUMENT_END_EVENT) {
			} else if (event->getType() == YAML_ALIAS_EVENT) {
			} else if (event->getType() == YAML_SCALAR_EVENT) {
				relpipe::writer::string_t scalarValue = y2s(event->getEvent()->data.scalar.value);
				if (state == State::RELATIONS) relationName = scalarValue;
				else if (state == State::ATTRIBUTE) record.push_back(scalarValue);
				else throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_SCALAR_EVENT");
			} else if (event->getType() == YAML_SEQUENCE_START_EVENT) {
				if (state == State::RELATIONS) state = State::RECORDS;
				else throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_SEQUENCE_START_EVENT");
			} else if (event->getType() == YAML_SEQUENCE_END_EVENT) {
				if (state == State::RECORDS) {
					state = State::RELATIONS;
					relationName.clear();
					attributesMetadata.clear();
				} else {
					throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_SEQUENCE_END_EVENT");
				}
			} else if (event->getType() == YAML_MAPPING_START_EVENT) {
				if (state == State::START) state = State::RELATIONS;
				else if (state == State::RECORDS) state = State::ATTRIBUTE;
				else if (state == State::RELATIONS) throw relpipe::writer::RelpipeWriterException(L"Not yet implemented"); // TODO: there might be also a map in the relation value (not only a sequence) that would contain metadata (i.e. data types + support for relation containing no records)
				else throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_MAPPING_START_EVENT");
			} else if (event->getType() == YAML_MAPPING_END_EVENT) {
				if (state == State::ATTRIBUTE) {
					if (attributesMetadata.size() == 0) {
						if (record.size() % 2) relpipe::writer::RelpipeWriterException(L"Invalid count of values in the record vector");
						for (size_t i = 0; i < record.size(); i += 2) attributesMetadata.push_back(relpipe::writer::AttributeMetadata{record[i], relpipe::writer::TypeId::STRING});
						writer->startRelation(relationName, attributesMetadata, true);
					}
					for (auto m : attributesMetadata) writer->writeAttribute(findValue(record, m.attributeName));
					record.clear();
					state = State::RECORDS;
				} else if (state == State::RELATIONS) {
					break; // map of the relations ends (root)
				} else {
					throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_MAPPING_END_EVENT");
				}
			} else {
				throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unknown event: " + std::to_wstring(event->getType()));
			}
		}
	}
};

}
}
}