src/XMLDocumentConstructor.h
author František Kučera <franta-hg@frantovo.cz>
Wed, 28 Oct 2020 16:30:01 +0100
branchv_0
changeset 19 aabdab3e05a0
parent 18 737c507a3e32
child 21 053054f9f702
permissions -rw-r--r--
allow parsing a single scalar value (plain text) directly at the root, without any sequences or maps

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

namespace relpipe {
namespace in {
namespace xmltable {

#include <codecvt>
#include <vector>

#include <libxml++-2.6/libxml++/libxml++.h>
#include <yaml.h>

class XMLDocumentConstructor {
private:
	std::istream* input = nullptr;
	xmlpp::DomParser* parser = nullptr;
	yaml_parser_t yamlParser;

	enum class Mode {
		ROOT,
		SEQUENCE,
		MAPPING,
		MAP_KEY
	};

	xmlpp::Element* current;
	std::vector<Mode> mode;

	static int readFromInput(void* instance, unsigned char* buffer, size_t size, size_t* length) {
		std::istream* input = ((XMLDocumentConstructor*) instance)->input;
		input->read((char*) buffer, size);
		*length = input->gcount();
		return (input->good() || input->eof()) ? 1 : 0;
	}

	/**
	 * Both YAML and XML strings are in UTF-8.
	 */
	const char* y2x(yaml_char_t* value) {
		return value ? (const char*) value : "";
	}

	const std::string y2xname(yaml_char_t* value) {
		// FIXME: escaping, assure valid XML names
		//return std::string("name_") + y2x(value);
		return y2x(value);
	}

	xmlpp::Element* parentOrSelf(xmlpp::Element* current) {
		return current->get_parent() == nullptr ? current : current->get_parent();
	}

public:

	XMLDocumentConstructor(std::istream* input, xmlpp::DomParser* parser) : input(input), parser(parser) {
		yaml_parser_initialize(&yamlParser);
		yaml_parser_set_input(&yamlParser, readFromInput, (void*) this);
	}

	virtual ~XMLDocumentConstructor() {
		yaml_parser_delete(&yamlParser);
	}

	void process() {
		current = parser->get_document()->create_root_node("yaml");
		mode.push_back(Mode::ROOT);
		std::string itemName;

		while (true) {
			yaml_event_t event;

			if (!yaml_parser_parse(&yamlParser, &event)) {
				// FIXME: throw exception
				std::wcerr << L"YAML error" << std::endl;
				return;
			}


			if (event.type == YAML_STREAM_END_EVENT) {
				std::cerr << "YAML_STREAM_END_EVENT" << std::endl;
				yaml_event_delete(&event);
				break;
			} else if (event.type == YAML_STREAM_START_EVENT) {
				std::cerr << "YAML_STREAM_START_EVENT" << std::endl;
			} else if (event.type == YAML_NO_EVENT) {
				std::cerr << "YAML_NO_EVENT" << std::endl;
				current->add_child("null"); // TODO: null?
			} else if (event.type == YAML_DOCUMENT_START_EVENT) {
				std::cerr << "YAML_DOCUMENT_START_EVENT" << std::endl;
			} else if (event.type == YAML_DOCUMENT_END_EVENT) {
				std::cerr << "YAML_DOCUMENT_END_EVENT" << std::endl;
			} else if (event.type == YAML_ALIAS_EVENT) {
				std::cerr << "YAML_ALIAS_EVENT" << std::endl;
				// TODO: alias?
			} else if (event.type == YAML_SCALAR_EVENT) {
				if (mode.back() == Mode::SEQUENCE) {
					std::cerr << "YAML_SCALAR_EVENT: Mode::SEQUENCE: " << event.data.scalar.value << std::endl;
					current->add_child(itemName)->add_child_text(y2x(event.data.scalar.value));
				} else if (mode.back() == Mode::MAPPING) {
					std::cerr << "YAML_SCALAR_EVENT: Mode::MAPPING: " << event.data.scalar.value << std::endl;
					current = current->add_child(y2xname(event.data.scalar.value));
					mode.push_back(Mode::MAP_KEY);
				} else if (mode.back() == Mode::MAP_KEY) {
					std::cerr << "YAML_SCALAR_EVENT: Mode::MAP_KEY: " << event.data.scalar.value << std::endl;
					current->add_child_text(y2x(event.data.scalar.value));
					current = parentOrSelf(current);
					mode.pop_back();
				} else if (mode.back() == Mode::ROOT) {
					current->add_child_text(y2x(event.data.scalar.value));
				} else {
					std::cerr << "YAML_SCALAR_EVENT: ???" << std::endl;
				}

			} else if (event.type == YAML_SEQUENCE_START_EVENT) {
				std::cerr << "YAML_SEQUENCE_START_EVENT" << std::endl;
				xmlpp::Element* parent = current->get_parent();
				if (parent) {
					itemName = current->get_name();
					parent->remove_child(current);
					current = parent;
				} else {
					itemName = "item";
				}
				if (mode.back() == Mode::MAP_KEY) mode.pop_back();
				mode.push_back(Mode::SEQUENCE);
			} else if (event.type == YAML_SEQUENCE_END_EVENT) {
				std::cerr << "YAML_SEQUENCE_END_EVENT" << std::endl;
				mode.pop_back(); // TODO: assert sequence?
			} else if (event.type == YAML_MAPPING_START_EVENT) {

				if (mode.back() == Mode::ROOT) {
					std::cerr << "YAML_MAPPING_START_EVENT: Mode:ROOT" << std::endl;
				} else if (mode.back() == Mode::SEQUENCE) {
					std::cerr << "YAML_MAPPING_START_EVENT: Mode::SEQUENCE" << std::endl;
					current = current->add_child(itemName);
				} else if (mode.back() == Mode::MAP_KEY) {
					std::cerr << "YAML_MAPPING_START_EVENT: Mode::MAP_KEY" << std::endl;
					mode.pop_back();
				} else {
					std::cerr << "YAML_MAPPING_START_EVENT: Mode::?" << std::endl;
					// TODO: map might be a key of another map → wrap/nest
				}

				mode.push_back(Mode::MAPPING);
			} else if (event.type == YAML_MAPPING_END_EVENT) {
				std::cerr << "YAML_MAPPING_END_EVENT" << std::endl;
				current = parentOrSelf(current);
				mode.pop_back(); // TODO: assert map?
			} else {
				std::cerr << "???" << std::endl;
			}

			yaml_event_delete(&event);
		}


	}
};

}
}
}