/**
* Relational pipes
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include <locale>
#include <codecvt>
#include <regex>
#include <cassert>
#include <relpipe/reader/typedefs.h>
#include <relpipe/reader/TypeId.h>
#include <relpipe/reader/RelpipeReaderException.h>
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
#include <relpipe/reader/handlers/AttributeMetadata.h>
namespace relpipe {
namespace out {
namespace recfile {
using namespace relpipe::reader;
class RecfileHandler : public handlers::RelationalReaderStringHandler {
private:
std::ostream& output;
wstring_convert<codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8
std::vector<TypeId> attributeTypes;
std::vector<string_t> attributeTypeCodes;
std::vector<string_t> attributeNames;
integer_t valueCount = 0;
integer_t attributeCount = 0;
integer_t relationCount = 0;
void writeRelationName(const string_t& name) {
// FIXME: escaping/filtering
output << "%rec: " << convertor.to_bytes(name) << std::endl;
}
const std::string toRecfileType(const TypeId& type) {
switch (type) {
case TypeId::BOOLEAN: return "bool";
case TypeId::INTEGER: return "int";
case TypeId::STRING: return "";
default: throw RelpipeReaderException(L"Unsupported type – unable to convert to a Recfile type");
}
}
void writeAttributeMetadata(const handlers::AttributeMetadata& attribute) {
// FIXME: escaping/filtering
std::string recfileType = toRecfileType(attribute.getTypeId());
if (recfileType.size()) output << "%type: " << convertor.to_bytes(attribute.getAttributeName()) << " " << recfileType << std::endl;
}
void writeSeparator() {
output << std::endl;
}
bool between(wchar_t ch, wchar_t start, wchar_t end) {
return ch >= start && ch <= end;
}
bool isValidNameCharacter(wchar_t ch, bool first) {
if (first) {
// also '%' is technically valid here, but it is used for special
// purposes like the relation name or attribute types
return /**/between(ch, L'a', L'z')
|| between(ch, L'A', L'Z');
} else {
return ch == L'_'
|| between(ch, L'a', L'z')
|| between(ch, L'A', L'Z')
|| between(ch, L'0', L'9');
}
}
void writeAttribute(const string_t& name, const TypeId& type, const string_t& value) {
// TODO: multiple escapting mode - including one that is not lossless
// but allows writing a single '_' inside the name
for (size_t i = 0, limit = name.size(); i < limit; i++) {
wchar_t ch = name[i];
bool valid = isValidNameCharacter(ch, i == 0);
// Not a lossless round-trip
// (maybe we could sacrifice some reserved prefix):
if (i == 0 && !valid) output << 'x';
if (ch == '_') output << "__";
else if (valid) output << convertor.to_bytes(ch);
else output << '_' << ((uint32_t) ch) << '_';
}
output << ": ";
for (char ch : convertor.to_bytes(value)) {
output << ch;
if (ch == '\n') output << "+ ";
}
output << std::endl;
}
void writeRecordCount() {
if (attributeCount) {
output << std::endl << "# Record count: " << (valueCount / attributeCount) << std::endl;
}
}
public:
RecfileHandler(std::ostream& output) : output(output) {
}
void startRelation(string_t name, std::vector<handlers::AttributeMetadata> attributes) override {
writeRecordCount();
valueCount = 0;
attributeCount = 0;
if (relationCount) writeSeparator();
relationCount++;
writeRelationName(name);
attributeCount = attributes.size();
attributeTypes.resize(attributeCount);
attributeTypeCodes.resize(attributeCount);
attributeNames.resize(attributeCount);
for (int i = 0; i < attributes.size(); i++) {
attributeNames[i] = attributes[i].getAttributeName();
attributeTypes[i] = attributes[i].getTypeId();
attributeTypeCodes[i] = attributes[i].getTypeName();
writeAttributeMetadata(attributes[i]);
}
}
void attribute(const string_t& value) override {
integer_t i = valueCount % attributeCount;
if (i == 0) writeSeparator();
valueCount++;
writeAttribute(attributeNames[i], attributeTypes[i], value);
}
void endOfPipe() {
writeRecordCount();
if (valueCount) writeSeparator();
}
};
}
}
}