src/XMLDocumentConstructor.h
author František Kučera <franta-hg@frantovo.cz>
Sat, 28 Nov 2020 23:52:01 +0100
branchv_0
changeset 25 0e8a58946c48
parent 24 ee72fccc5267
child 26 84ff7c97bfdc
permissions -rw-r--r--
add --parser-option

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <codecvt>
#include <vector>

#include <libxml++-2.6/libxml++/libxml++.h>
#include <yaml.h>

#include "XMLNameCodec.h"

namespace relpipe {
namespace in {
namespace xmltable {

class XMLDocumentConstructor {
private:
	std::istream* input = nullptr;
	xmlpp::DomParser* parser = nullptr;
	yaml_parser_t yamlParser;
	XMLNameCodec nameCodec;

	enum class Mode {
		ROOT,
		SEQUENCE,
		MAPPING,
		MAP_KEY
	};

	xmlpp::Element* current;
	std::vector<Mode> mode;

	static int readFromInput(void* instance, unsigned char* buffer, size_t size, size_t* length) {
		std::istream* input = ((XMLDocumentConstructor*) instance)->input;
		input->read((char*) buffer, size);
		*length = input->gcount();
		return (input->good() || input->eof()) ? 1 : 0;
	}
	
	/**
	 * Both YAML and XML strings are in UTF-8.
	 */
	const char* y2x(yaml_char_t* value) {
		return value ? (const char*) value : "";
	}

	const Glib::ustring y2xname(yaml_char_t* value) {
		return nameCodec.encode(y2x(value));
	}

	xmlpp::Element* parentOrSelf(xmlpp::Element* current) {
		return current->get_parent() == nullptr ? current : current->get_parent();
	}

public:

	XMLDocumentConstructor(std::istream* input, xmlpp::DomParser* parser) : input(input), parser(parser) {
		yaml_parser_initialize(&yamlParser);
		yaml_parser_set_input(&yamlParser, readFromInput, (void*) this);
	}

	virtual ~XMLDocumentConstructor() {
		yaml_parser_delete(&yamlParser);
	}
	
	void setOption(const std::string& uri, const std::string& value) {
	}

	void process() {
		current = parser->get_document()->create_root_node("yaml");
		mode.push_back(Mode::ROOT);
		std::string itemName;

		while (true) {
			yaml_event_t event;

			if (!yaml_parser_parse(&yamlParser, &event)) {
				// FIXME: throw exception
				return;
			}


			if (event.type == YAML_STREAM_END_EVENT) {
				yaml_event_delete(&event);
				break;
			} else if (event.type == YAML_STREAM_START_EVENT) {
			} else if (event.type == YAML_NO_EVENT) {
				current->add_child("null"); // TODO: null?
			} else if (event.type == YAML_DOCUMENT_START_EVENT) {
			} else if (event.type == YAML_DOCUMENT_END_EVENT) {
			} else if (event.type == YAML_ALIAS_EVENT) {
				// TODO: alias?
			} else if (event.type == YAML_SCALAR_EVENT) {
				if (mode.back() == Mode::SEQUENCE) {
					current->add_child(itemName)->add_child_text(y2x(event.data.scalar.value));
				} else if (mode.back() == Mode::MAPPING) {
					current = current->add_child(y2xname(event.data.scalar.value));
					mode.push_back(Mode::MAP_KEY);
				} else if (mode.back() == Mode::MAP_KEY) {
					current->add_child_text(y2x(event.data.scalar.value));
					current = parentOrSelf(current);
					mode.pop_back();
				} else if (mode.back() == Mode::ROOT) {
					current->add_child_text(y2x(event.data.scalar.value));
				} else {
					// TODO: process YAML_SCALAR_EVENT
				}

			} else if (event.type == YAML_SEQUENCE_START_EVENT) {
				xmlpp::Element* parent = current->get_parent();
				if (parent) {
					itemName = current->get_name();
					parent->remove_child(current);
					current = parent;
				} else {
					itemName = "item";
				}
				if (mode.back() == Mode::MAP_KEY) mode.pop_back();
				mode.push_back(Mode::SEQUENCE);
			} else if (event.type == YAML_SEQUENCE_END_EVENT) {
				mode.pop_back(); // TODO: assert sequence?
			} else if (event.type == YAML_MAPPING_START_EVENT) {

				if (mode.back() == Mode::ROOT) {
				} else if (mode.back() == Mode::SEQUENCE) {
					current = current->add_child(itemName);
				} else if (mode.back() == Mode::MAP_KEY) {
					mode.pop_back();
				} else {
					// TODO: map might be a key of another map → wrap/nest
				}

				mode.push_back(Mode::MAPPING);
			} else if (event.type == YAML_MAPPING_END_EVENT) {
				current = parentOrSelf(current);
				mode.pop_back(); // TODO: assert map?
			} else {
				// TODO: unsupported type?
			}

			yaml_event_delete(&event);
		}


	}
};

}
}
}