/**
* Relational pipes
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <codecvt>
#include <vector>
#include <sstream>
#include <iomanip>
#include <libxml++-2.6/libxml++/libxml++.h>
#include <cbor.h>
#include <relpipe/writer/RelpipeWriterException.h>
#include "XMLNameCodec.h"
namespace relpipe {
namespace in {
namespace xmltable {
class XMLDocumentConstructor {
private:
std::istream* input = nullptr;
xmlpp::DomParser* parser = nullptr;
XMLNameCodec nameCodec;
enum class Mode {
ROOT,
ARRAY,
MAP_KEY,
MAP_VALUE,
CHAR_STRING,
BYTE_STRING
};
Glib::ustring rootName = "cbor";
Glib::ustring itemName;
xmlpp::Element* current;
std::vector<Mode> mode;
/**
* number of remainin items in the fixed-size container (map or array) at current tree-level
*/
std::vector<ssize_t> remainingItems;
#define INDEFINITE -1
#define ATTR_VALUE_TYPE "value-type"
#define ATTR_KEY_TYPE "key-type"
#define ATTR_TAG "tag"
#define ATTR_BYTES_READ "bytes-read"
std::stringstream currentIndefiniteString;
uint64_t currentTag;
bool currentTagPresent = false;
cbor_callbacks callbacks = cbor_empty_callbacks;
/**
* Both CBOR and XML strings are in UTF-8.
*/
static std::string c2x(cbor_data value, uint64_t length) {
return value && length > 0 ? std::string((const char*) value, length) : "";
}
xmlpp::Element* parentOrSelf(xmlpp::Element* current) {
return current->get_parent() == nullptr ? current : current->get_parent();
}
void checkRemainingItems() {
if (mode.back() == Mode::ARRAY || mode.back() == Mode::MAP_KEY) {
if (remainingItems.back() != INDEFINITE) {
remainingItems.back()--;
if (remainingItems.back() < 1) containerEnd();
}
}
}
void writeCurrentTag(xmlpp::Element* element) {
if (currentTagPresent) {
element->set_attribute(ATTR_TAG, std::to_string(currentTag));
currentTagPresent = false;
}
}
void appendScalarValue(Glib::ustring value, Glib::ustring cborType) {
if (mode.back() == Mode::ARRAY) {
xmlpp::Element* element = current->add_child(nameCodec.encode(itemName));
element->add_child_text(value);
element->set_attribute(ATTR_VALUE_TYPE, cborType);
writeCurrentTag(element);
} else if (mode.back() == Mode::MAP_KEY) {
current = current->add_child(nameCodec.encode(value));
current->set_attribute(ATTR_KEY_TYPE, cborType);
writeCurrentTag(current);
mode.push_back(Mode::MAP_VALUE);
} else if (mode.back() == Mode::MAP_VALUE) {
current->add_child_text(value);
current->set_attribute(ATTR_VALUE_TYPE, cborType);
writeCurrentTag(current);
current = parentOrSelf(current);
mode.pop_back();
} else if (mode.back() == Mode::ROOT) {
current->add_child_text(value);
current->set_attribute(ATTR_VALUE_TYPE, cborType);
writeCurrentTag(current);
} else if (mode.back() == Mode::BYTE_STRING || mode.back() == Mode::CHAR_STRING) {
currentIndefiniteString << value;
} else {
// TODO: throw exception?
}
checkRemainingItems();
}
void arrayStart(ssize_t size) {
if (mode.back() == Mode::ROOT) {
current->set_attribute(ATTR_VALUE_TYPE, "array");
itemName = "item";
} else if (mode.back() == Mode::ARRAY) {
current = current->add_child(nameCodec.encode(itemName));
itemName = "item";
} else if (mode.back() == Mode::MAP_VALUE) {
xmlpp::Element* parent = current->get_parent();
itemName = current->get_name();
parent->remove_child(current);
current = parent;
mode.pop_back();
} else {
// Mode::BYTE_STRING and Mode::CHAR_STRING – make no sense
// Mode::MAP_KEY – currently not supported
throw relpipe::writer::RelpipeWriterException(L"Unsupported mode in arrayStart(): " + std::to_wstring((int) mode.back()));
}
mode.push_back(Mode::ARRAY);
remainingItems.push_back(size);
writeCurrentTag(current);
if (size == 0) containerEnd();
}
void mapStart(ssize_t size) {
if (mode.back() == Mode::ROOT) {
} else if (mode.back() == Mode::ARRAY) {
current = current->add_child(nameCodec.encode(itemName));
} else if (mode.back() == Mode::MAP_VALUE) {
mode.pop_back();
// TODO: remainingItems
} else {
// TODO: map might be a key of another map → wrap/nest
// …probably not
}
current->set_attribute(ATTR_VALUE_TYPE, "map");
mode.push_back(Mode::MAP_KEY);
remainingItems.push_back(size);
writeCurrentTag(current);
if (size == 0) containerEnd();
}
void flushCurrentIndefiniteString(Glib::ustring cborType) {
appendScalarValue(currentIndefiniteString.str(), cborType);
currentIndefiniteString = std::stringstream();
}
void containerEnd() {
Mode m = mode.back();
mode.pop_back();
remainingItems.pop_back();
if (m == Mode::MAP_KEY || m == Mode::MAP_VALUE || m == Mode::ARRAY) current = parentOrSelf(current);
else if (m == Mode::BYTE_STRING) flushCurrentIndefiniteString("byte-string");
else if (m == Mode::CHAR_STRING) flushCurrentIndefiniteString("string");
}
public:
XMLDocumentConstructor(std::istream* input, xmlpp::DomParser* parser) : input(input), parser(parser) {
#define CBOR_CALLBACK_START if (XMLDocumentConstructor* instance = static_cast<XMLDocumentConstructor*> (context)) {
#define CBOR_CALLBACK_END } else throw relpipe::writer::RelpipeWriterException(L"Invalid context in the CBOR callback.");
#define NUMERIC_CALLBACK(type, name) [](void* context, type value) { CBOR_CALLBACK_START instance->appendScalarValue(std::to_string(value), name); CBOR_CALLBACK_END }
callbacks.array_start = [](void* context, size_t size) {
CBOR_CALLBACK_START
instance->arrayStart(size);
CBOR_CALLBACK_END
};
callbacks.boolean = [](void* context, bool value) {
CBOR_CALLBACK_START
instance->appendScalarValue(value ? "true" : "false", "boolean");
CBOR_CALLBACK_END
};
callbacks.byte_string = [](void* context, cbor_data value, uint64_t size) {
CBOR_CALLBACK_START
std::stringstream hex;
hex << std::hex << std::setfill('0') << std::setw(2);
for (uint64_t i = 0; i < size; i++) hex << (int) value[i];
instance->appendScalarValue(hex.str(), "byte-string");
CBOR_CALLBACK_END
};
callbacks.byte_string_start = [](void* context) {
CBOR_CALLBACK_START
instance->mode.push_back(Mode::BYTE_STRING);
instance->remainingItems.push_back(INDEFINITE);
CBOR_CALLBACK_END
};
callbacks.float2 = NUMERIC_CALLBACK(float, "float2");
callbacks.float4 = NUMERIC_CALLBACK(float, "float4");
callbacks.float8 = NUMERIC_CALLBACK(double, "float8");
callbacks.indef_array_start = [](void* context) {
CBOR_CALLBACK_START
instance->arrayStart(INDEFINITE);
CBOR_CALLBACK_END
};
callbacks.indef_map_start = [](void* context) {
CBOR_CALLBACK_START
instance->mapStart(INDEFINITE);
CBOR_CALLBACK_END
};
callbacks.indef_break = [](void* context) {
CBOR_CALLBACK_START
instance->containerEnd();
CBOR_CALLBACK_END
};
callbacks.map_start = [](void* context, size_t size) {
CBOR_CALLBACK_START
instance->mapStart(size);
CBOR_CALLBACK_END
};
callbacks.negint8 = NUMERIC_CALLBACK(uint8_t, "negative-int8");
callbacks.negint16 = NUMERIC_CALLBACK(uint16_t, "negative-int16");
callbacks.negint32 = NUMERIC_CALLBACK(uint32_t, "negative-int32");
callbacks.negint64 = NUMERIC_CALLBACK(uint64_t, "negative-int64");
callbacks.null = [](void* context) {
CBOR_CALLBACK_START
instance->appendScalarValue("", "null");
CBOR_CALLBACK_END
};
callbacks.string = [](void* context, cbor_data value, uint64_t size) {
CBOR_CALLBACK_START
instance->appendScalarValue(c2x(value, size), "string");
CBOR_CALLBACK_END
};
callbacks.string_start = [](void* context) {
CBOR_CALLBACK_START
instance->mode.push_back(Mode::CHAR_STRING);
instance->remainingItems.push_back(INDEFINITE);
CBOR_CALLBACK_END
};
callbacks.tag = [](void* context, uint64_t value) {
CBOR_CALLBACK_START
instance->currentTag = value;
instance->currentTagPresent = true;
CBOR_CALLBACK_END
};
callbacks.uint8 = NUMERIC_CALLBACK(uint8_t, "uint8");
callbacks.uint16 = NUMERIC_CALLBACK(uint16_t, "uint16");
callbacks.uint32 = NUMERIC_CALLBACK(uint32_t, "uint32");
callbacks.uint64 = NUMERIC_CALLBACK(uint64_t, "uint64");
callbacks.undefined = [](void* context) {
CBOR_CALLBACK_START
instance->appendScalarValue("", "undefined"); // TODO: throw exception?
CBOR_CALLBACK_END
};
}
virtual ~XMLDocumentConstructor() {
}
void setOption(const std::string& uri, const std::string& value) {
if (uri == "root-name") rootName = value;
else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
}
void process() {
current = parser->get_document()->create_root_node(rootName);
mode.push_back(Mode::ROOT);
// TODO: better streaming/buffering? (however, we still have to hold whole document in memory and „infinite“ stream processing is impossible)
std::stringstream bufferStream;
for (char ch = input->get(); input->good(); ch = input->get()) bufferStream.put(ch);
std::string buffer = bufferStream.str();
size_t bytesRead = 0;
size_t length = buffer.size();
for (cbor_decoder_result result; bytesRead < length; bytesRead += result.read) {
result = cbor_stream_decode((cbor_data) buffer.c_str() + bytesRead, length - bytesRead, &callbacks, this);
if (result.status != cbor_decoder_status::CBOR_DECODER_FINISHED) throw relpipe::writer::RelpipeWriterException(L"CBOR parsing failed: status = " + std::to_wstring(result.status));
}
checkRemainingItems();
parser->get_document()->get_root_node()->set_attribute(ATTR_BYTES_READ, std::to_string(bytesRead));
}
};
}
}
}