convert YAML to relations
Expected structure is:
map:
key = relation name
value = sequence of maps (records) where:
key = attribute name
value = attribute value
Example:
--------
fruit:
- id: 1
name: cherry
- id: 2
name: apple
- id: 3
name: plum
empty:
# this relation has no attributes and will be ignored
sparse:
- a: aaa
b: bbb
c: ccc
- a: AAA
b: BBB
- c: C
d: D
e: E
a: A
f: F
- c: CCC
--------
Not yet supported:
--------
my_relation_with_metadata:
attribute-metadata:
- name: a
type: string
- name: b
type: boolean
record:
- a: a
- b: true
--------
/**
* Relational pipes
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdlib>
#include <iostream>
#include <memory>
#include <vector>
#include <yaml.h>
#include <relpipe/writer/RelationalWriter.h>
#include <relpipe/writer/typedefs.h>
namespace relpipe {
namespace in {
namespace yaml {
class YAMLCommand {
private:
std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // YAML strings are in UTF-8
class YAMLEvent {
private:
yaml_event_t event;
public:
YAMLEvent(yaml_event_t event) : event(event) {
}
virtual ~YAMLEvent() {
yaml_event_delete(&event);
}
const yaml_event_type_t getType() const {
return event.type;
}
const yaml_event_t* getEvent() const {
return &event;
}
YAMLEvent(const YAMLEvent&) = delete;
YAMLEvent& operator=(const YAMLEvent&) = delete;
};
class YAMLParser {
private:
yaml_parser_t yamlParser;
std::istream* input = nullptr;
static int readFromInput(void* instance, unsigned char* buffer, size_t size, size_t* length) {
std::istream* input = ((YAMLParser*) instance)->input;
input->read((char*) buffer, size);
*length = input->gcount();
return (input->good() || input->eof()) ? 1 : 0;
}
public:
YAMLParser() {
yaml_parser_initialize(&yamlParser);
}
virtual ~YAMLParser() {
yaml_parser_delete(&yamlParser);
}
YAMLParser(const YAMLParser&) = delete;
YAMLParser& operator=(const YAMLParser&) = delete;
void setInput(std::istream* input) {
this->input = input;
yaml_parser_set_input(&yamlParser, readFromInput, (void*) this);
}
YAMLEvent* next() {
yaml_event_t event;
int result = yaml_parser_parse(&yamlParser, &event);
return result == 1 && event.type != yaml_event_type_e::YAML_NO_EVENT ? new YAMLEvent(event) : nullptr; // 1 = OK in yaml.h; YAML_NO_EVENT = end
}
};
YAMLParser parser;
using YAMLEvent_p = std::shared_ptr<YAMLEvent>;
enum class State {
START,
RELATIONS,
ATTRIBUTE,
RECORDS
};
State state;
relpipe::writer::string_t y2s(yaml_char_t* value) {
return value ? convertor.from_bytes((const char*) value) : L"";
}
relpipe::writer::string_t findValue(std::vector<relpipe::writer::string_t> record, relpipe::writer::string_t key) {
if (record.size() % 2) relpipe::writer::RelpipeWriterException(L"Invalid count of values in the record vector");
for (size_t i = 0; i < record.size(); i += 2) {
if (record[i] == key) return record[i + 1];
}
return L"";
}
public:
YAMLCommand() {
}
virtual ~YAMLCommand() {
}
void process(std::istream& input, std::shared_ptr<relpipe::writer::RelationalWriter> writer) {
parser.setInput(&input);
relpipe::writer::string_t relationName;
std::vector<relpipe::writer::string_t> record;
std::vector<relpipe::writer::AttributeMetadata> attributesMetadata;
state = State::START;
for (YAMLEvent_p event = YAMLEvent_p(parser.next()); event; event = YAMLEvent_p(parser.next())) {
if (event->getType() == YAML_NO_EVENT) {
throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: YAML_NO_EVENT");
} else if (event->getType() == YAML_STREAM_START_EVENT) {
} else if (event->getType() == YAML_STREAM_END_EVENT) {
} else if (event->getType() == YAML_DOCUMENT_START_EVENT) {
} else if (event->getType() == YAML_DOCUMENT_END_EVENT) {
} else if (event->getType() == YAML_ALIAS_EVENT) {
} else if (event->getType() == YAML_SCALAR_EVENT) {
relpipe::writer::string_t scalarValue = y2s(event->getEvent()->data.scalar.value);
if (state == State::RELATIONS) relationName = scalarValue;
else if (state == State::ATTRIBUTE) record.push_back(scalarValue);
else throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_SCALAR_EVENT");
} else if (event->getType() == YAML_SEQUENCE_START_EVENT) {
if (state == State::RELATIONS) state = State::RECORDS;
else throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_SEQUENCE_START_EVENT");
} else if (event->getType() == YAML_SEQUENCE_END_EVENT) {
if (state == State::RECORDS) {
state = State::RELATIONS;
relationName.clear();
attributesMetadata.clear();
} else {
throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_SEQUENCE_END_EVENT");
}
} else if (event->getType() == YAML_MAPPING_START_EVENT) {
if (state == State::START) state = State::RELATIONS;
else if (state == State::RECORDS) state = State::ATTRIBUTE;
else if (state == State::RELATIONS) throw relpipe::writer::RelpipeWriterException(L"Not yet implemented"); // TODO: there might be also a map in the relation value (not only a sequence) that would contain metadata (i.e. data types + support for relation containing no records)
else throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_MAPPING_START_EVENT");
} else if (event->getType() == YAML_MAPPING_END_EVENT) {
if (state == State::ATTRIBUTE) {
if (attributesMetadata.size() == 0) {
if (record.size() % 2) relpipe::writer::RelpipeWriterException(L"Invalid count of values in the record vector");
for (size_t i = 0; i < record.size(); i += 2) attributesMetadata.push_back(relpipe::writer::AttributeMetadata{record[i], relpipe::writer::TypeId::STRING});
writer->startRelation(relationName, attributesMetadata, true);
}
for (auto m : attributesMetadata) writer->writeAttribute(findValue(record, m.attributeName));
record.clear();
state = State::RECORDS;
} else if (state == State::RELATIONS) {
break; // map of the relations ends (root)
} else {
throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unexpected YAML_MAPPING_END_EVENT");
}
} else {
throw relpipe::writer::RelpipeWriterException(L"Invalid YAML structure: unknown event: " + std::to_wstring(event->getType()));
}
}
}
};
}
}
}