src/XMLDocumentConstructor.h
author František Kučera <franta-hg@frantovo.cz>
Sun, 06 Jun 2021 11:43:18 +0200
branchv_0
changeset 36 c97810ccfdef
parent 35 5bfd1d389d5f
child 37 cae9eedf4180
permissions -rw-r--r--
indefinite byte and char strings

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <codecvt>
#include <vector>
#include <sstream>
#include <iomanip>

#include <libxml++-2.6/libxml++/libxml++.h>
#include <cbor.h>

#include <relpipe/writer/RelpipeWriterException.h>

#include "XMLNameCodec.h"

namespace relpipe {
namespace in {
namespace xmltable {

class XMLDocumentConstructor {
private:
	std::istream* input = nullptr;
	xmlpp::DomParser* parser = nullptr;
	XMLNameCodec nameCodec;

	enum class Mode {
		ROOT,
		ARRAY,
		MAP_KEY,
		MAP_VALUE,
		CHAR_STRING,
		BYTE_STRING
	};


	Glib::ustring rootName = "cbor";
	Glib::ustring itemName;
	xmlpp::Element* current;
	std::vector<Mode> mode;
	/**
	 * number of remainin items in the fixed-size container (map or array) at current tree-level
	 */
	std::vector<ssize_t> remainingItems;
#define INDEFINITE -1

	std::stringstream currentIndefiniteString;

	cbor_callbacks callbacks = cbor_empty_callbacks;

	/**
	 * Both CBOR and XML strings are in UTF-8.
	 */
	static std::string c2x(cbor_data value, uint64_t length) {
		return value && length > 0 ? std::string((const char*) value, length) : "";
	}

	xmlpp::Element* parentOrSelf(xmlpp::Element* current) {
		return current->get_parent() == nullptr ? current : current->get_parent();
	}

	void checkRemainingItems() {
		if (mode.back() == Mode::ARRAY || mode.back() == Mode::MAP_KEY) {
			if (remainingItems.back() != INDEFINITE) {
				remainingItems.back()--;
				if (remainingItems.back() < 1) containerEnd();
			}
		}
	}

	void appendScalarValue(Glib::ustring value, Glib::ustring cborType, bool isNull = false) {
		// TODO: null
		if (mode.back() == Mode::ARRAY) {
			xmlpp::Element* element = current->add_child(nameCodec.encode(itemName));
			element->add_child_text(value);
			element->set_attribute("value-type", cborType);
		} else if (mode.back() == Mode::MAP_KEY) {
			current = current->add_child(nameCodec.encode(value));
			current->set_attribute("key-type", cborType);
			mode.push_back(Mode::MAP_VALUE);
		} else if (mode.back() == Mode::MAP_VALUE) {
			current->add_child_text(value);
			current->set_attribute("value-type", cborType);
			current = parentOrSelf(current);
			mode.pop_back();
		} else if (mode.back() == Mode::ROOT) {
			current->add_child_text(value);
		} else if (mode.back() == Mode::BYTE_STRING || mode.back() == Mode::CHAR_STRING) {
			currentIndefiniteString << value;
		} else {
			// TODO: process YAML_SCALAR_EVENT
		}

		checkRemainingItems();
	}

	void arrayStart(ssize_t size) {
		xmlpp::Element* parent = current->get_parent();
		if (parent) {
			itemName = current->get_name();
			parent->remove_child(current);
			current = parent;
		} else {
			itemName = "item";
		}

		if (mode.back() == Mode::MAP_KEY) mode.pop_back();
		mode.push_back(Mode::ARRAY);
		remainingItems.push_back(size);
	}

	void mapStart(ssize_t size) {
		if (mode.back() == Mode::ROOT) {
		} else if (mode.back() == Mode::ARRAY) {
			current = current->add_child(nameCodec.encode(itemName));
			current->set_attribute("value-type", "map");
		} else if (mode.back() == Mode::MAP_VALUE) {
			mode.pop_back();
			// TODO: remainingItems
		} else {
			// TODO: map might be a key of another map → wrap/nest
			// …probably not
		}

		mode.push_back(Mode::MAP_KEY);
		remainingItems.push_back(size);
	}

	void flushCurrentIndefiniteString(Glib::ustring cborType) {
		mode.pop_back();
		appendScalarValue(currentIndefiniteString.str(), cborType);
		currentIndefiniteString = std::stringstream();
	}

	void containerEnd() {
		remainingItems.pop_back();
		if (mode.back() == Mode::MAP_KEY || mode.back() == Mode::MAP_VALUE) current = parentOrSelf(current);
		else if (mode.back() == Mode::BYTE_STRING) flushCurrentIndefiniteString("byte-string");
		else if (mode.back() == Mode::CHAR_STRING) flushCurrentIndefiniteString("string");
		else mode.pop_back(); // TODO: assert map/array
	}

public:

	XMLDocumentConstructor(std::istream* input, xmlpp::DomParser* parser) : input(input), parser(parser) {

#define CBOR_CALLBACK_START if (XMLDocumentConstructor* instance = static_cast<XMLDocumentConstructor*> (context)) {
#define CBOR_CALLBACK_END } else throw relpipe::writer::RelpipeWriterException(L"Invalid context in the CBOR callback.");
#define NUMERIC_CALLBACK(type, name) [](void* context, type value) { CBOR_CALLBACK_START instance->appendScalarValue(std::to_string(value), name); CBOR_CALLBACK_END }

		callbacks.array_start = [](void* context, size_t size) {
			CBOR_CALLBACK_START
			instance->arrayStart(size);
			CBOR_CALLBACK_END
		};

		callbacks.boolean = [](void* context, bool value) {
			CBOR_CALLBACK_START
			instance->appendScalarValue(value ? "true" : "false", "boolean");
			CBOR_CALLBACK_END
		};

		callbacks.byte_string = [](void* context, cbor_data value, uint64_t size) {
			CBOR_CALLBACK_START
			std::stringstream hex;
			hex << std::hex << std::setfill('0') << std::setw(2);
			for (uint64_t i = 0; i < size; i++) hex << (int) value[i];
			instance->appendScalarValue(hex.str(), "byte-string");
			CBOR_CALLBACK_END
		};

		callbacks.byte_string_start = [](void* context) {
			CBOR_CALLBACK_START
			instance->mode.push_back(Mode::BYTE_STRING);
			instance->remainingItems.push_back(INDEFINITE);
			CBOR_CALLBACK_END
		};

		callbacks.float2 = NUMERIC_CALLBACK(float, "float2");
		callbacks.float4 = NUMERIC_CALLBACK(float, "float4");
		callbacks.float8 = NUMERIC_CALLBACK(double, "float8");

		callbacks.indef_array_start = [](void* context) {
			CBOR_CALLBACK_START
			instance->arrayStart(INDEFINITE);
			CBOR_CALLBACK_END
		};

		callbacks.indef_map_start = [](void* context) {
			CBOR_CALLBACK_START
			instance->mapStart(INDEFINITE);
			CBOR_CALLBACK_END
		};

		callbacks.indef_break = [](void* context) {
			CBOR_CALLBACK_START
			instance->containerEnd();
			CBOR_CALLBACK_END
		};

		callbacks.map_start = [](void* context, size_t size) {
			CBOR_CALLBACK_START
			instance->mapStart(size);
			CBOR_CALLBACK_END
		};

		callbacks.negint8 = NUMERIC_CALLBACK(uint8_t, "negative-int8");
		callbacks.negint16 = NUMERIC_CALLBACK(uint16_t, "negative-int16");
		callbacks.negint32 = NUMERIC_CALLBACK(uint32_t, "negative-int32");
		callbacks.negint64 = NUMERIC_CALLBACK(uint64_t, "negative-int64");

		callbacks.null = [](void* context) {
			CBOR_CALLBACK_START
			instance->appendScalarValue("", "null", true);
			CBOR_CALLBACK_END
		};

		callbacks.string = [](void* context, cbor_data value, uint64_t size) {
			CBOR_CALLBACK_START
			instance->appendScalarValue(c2x(value, size), "string");
			CBOR_CALLBACK_END
		};

		callbacks.string_start = [](void* context) {
			CBOR_CALLBACK_START
			instance->mode.push_back(Mode::CHAR_STRING);
			instance->remainingItems.push_back(INDEFINITE);
			CBOR_CALLBACK_END
		};

		callbacks.tag = [](void* context, uint64_t value) {
			CBOR_CALLBACK_START
			// TODO: implement
			xmlpp::Element* element = instance->current->add_child("tag");
			element->add_child_text(std::to_string(value));
			CBOR_CALLBACK_END
		};

		callbacks.uint8 = NUMERIC_CALLBACK(uint8_t, "uint8");
		callbacks.uint16 = NUMERIC_CALLBACK(uint16_t, "uint16");
		callbacks.uint32 = NUMERIC_CALLBACK(uint32_t, "uint32");
		callbacks.uint64 = NUMERIC_CALLBACK(uint64_t, "uint64");

		callbacks.undefined = [](void* context) {
			CBOR_CALLBACK_START
			instance->appendScalarValue("undefined", "undefined", true); // FIXME: throw exception?
			CBOR_CALLBACK_END
		};

	}

	virtual ~XMLDocumentConstructor() {
	}

	void setOption(const std::string& uri, const std::string& value) {
		if (uri == "root-name") rootName = value;
		else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
	}

	void process() {
		current = parser->get_document()->create_root_node(rootName);
		mode.push_back(Mode::ROOT);

		// TODO: better streaming/buffering? (however, we still have to hold whole document in memory and „infinite“ stream processing is impossible)
		std::stringstream bufferStream;
		for (char ch = input->get(); input->good(); ch = input->get()) bufferStream.put(ch);
		std::string buffer = bufferStream.str();

		size_t bytesRead = 0;
		size_t length = buffer.size();
		for (cbor_decoder_result result; bytesRead < length; bytesRead += result.read) {
			result = cbor_stream_decode((cbor_data) buffer.c_str() + bytesRead, length - bytesRead, &callbacks, this);
			if (result.status != cbor_decoder_status::CBOR_DECODER_FINISHED) throw relpipe::writer::RelpipeWriterException(L"CBOR parsing failed: status = " + std::to_wstring(result.status));
		}

		checkRemainingItems();

		parser->get_document()->get_root_node()->set_attribute("bytes-read", std::to_string(bytesRead));
	}
};

}
}
}