/**
* Relational pipes
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include <locale>
#include <codecvt>
#include <regex>
#include <cassert>
#include <relpipe/reader/typedefs.h>
#include <relpipe/reader/TypeId.h>
#include <relpipe/reader/RelpipeReaderException.h>
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
#include <relpipe/reader/handlers/AttributeMetadata.h>
namespace relpipe {
namespace out {
namespace recfile {
using namespace relpipe::reader;
class RecfileHandler : public handlers::RelationalReaderStringHandler {
private:
std::ostream& output;
wstring_convert<codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8
std::vector<TypeId> attributeTypes;
std::vector<string_t> attributeTypeCodes;
std::vector<string_t> attributeNamesIn;
std::vector<string_t> attributeNamesOut;
integer_t valueCount = 0;
integer_t attributeCount = 0;
integer_t relationCount = 0;
void writeRelationName(const string_t& name) {
// FIXME: escaping/filtering
output << "%rec: " << convertor.to_bytes(name) << std::endl;
}
const std::string toRecfileType(const TypeId& type) {
switch (type) {
case TypeId::BOOLEAN: return "bool";
case TypeId::INTEGER: return "int";
case TypeId::STRING: return "";
default: throw RelpipeReaderException(L"Unsupported type – unable to convert to a Recfile type");
}
}
void writeAttributeMetadata(size_t i) {
std::string recfileType = toRecfileType(attributeTypes[i]);
if (recfileType.size()) output << "%type: " << convertor.to_bytes(attributeNamesOut[i]) << " " << recfileType << std::endl;
}
void writeSeparator() {
output << std::endl;
}
bool between(wchar_t ch, wchar_t start, wchar_t end) {
return ch >= start && ch <= end;
}
bool isValidNameCharacter(wchar_t ch, bool first) {
if (first) {
// also '%' is technically valid here, but it is used for special
// purposes like the relation name or attribute types
return /**/between(ch, L'a', L'z')
|| between(ch, L'A', L'Z');
} else {
return ch == L'_'
|| between(ch, L'a', L'z')
|| between(ch, L'A', L'Z')
|| between(ch, L'0', L'9');
}
}
const string_t escapeAttributeName(const string_t& name) {
std::wstringstream escaped;
// TODO: multiple escapting mode - including one that is not lossless
// but allows writing a single '_' inside the name
for (size_t i = 0, limit = name.size(); i < limit; i++) {
wchar_t ch = name[i];
bool valid = isValidNameCharacter(ch, i == 0);
// Not a lossless round-trip
// (maybe we could sacrifice some reserved prefix):
if (i == 0 && !valid) escaped << 'x';
if (ch == '_') escaped << "__";
else if (valid) escaped << ch;
else escaped << '_' << ((uint32_t) ch) << '_';
}
return escaped.str();
}
integer_t computeWidth(const wchar_t ch) {
switch (ch) {
case L'\t':
// TODO: tabulator width?
return 4;
case L' ':
return 1;
default:
return std::max(0, wcwidth(ch));
}
}
/**
* @param stringValue
* @return the width that would the string occupy on the display (particular characters might be wider than 1 column)
*/
integer_t computeWidth(const string_t & stringValue) {
integer_t width = 0;
for (wchar_t ch : stringValue) width += computeWidth(ch);
return width;
}
void writeAttribute(const string_t& escapedName, const TypeId& type, const string_t& value) {
output << convertor.to_bytes(escapedName) << ": ";
static const integer_t MAX_LINE_WIDTH = 80; // TODO: configuration
static const boolean_t unlimited = MAX_LINE_WIDTH == -1;
integer_t currentWidth = computeWidth(escapedName) + 2; // 2 = ": " separator
integer_t valueWidth = computeWidth(value);
integer_t remainingWidth = valueWidth;
for (size_t i = 0, limit = value.size(); i < limit; i++) {
wchar_t ch = value[i];
integer_t characterWidth = computeWidth(ch);
if (ch == '\n') {
output << std::endl << "+ ";
currentWidth = 2; // 2 = "+ " prefix
} else {
if (unlimited || (currentWidth + remainingWidth) <= MAX_LINE_WIDTH) {
// all remaining characters fit the in the limit
currentWidth += characterWidth;
output << convertor.to_bytes(ch);
} else if ((currentWidth + characterWidth + 1) <= MAX_LINE_WIDTH) { // 1 = "\"
// we will wrap the line, but not yet
currentWidth += characterWidth;
output << convertor.to_bytes(ch);
} else {
output << "\\" << std::endl;
currentWidth = 0;
currentWidth += characterWidth;
output << convertor.to_bytes(ch);
}
}
remainingWidth -= characterWidth;
}
output << std::endl;
}
void writeRecordCount() {
if (attributeCount) {
output << std::endl << "# Record count: " << (valueCount / attributeCount) << std::endl;
}
}
public:
RecfileHandler(std::ostream& output) : output(output) {
}
void startRelation(string_t name, std::vector<handlers::AttributeMetadata> attributes) override {
writeRecordCount();
valueCount = 0;
attributeCount = 0;
if (relationCount) writeSeparator();
relationCount++;
writeRelationName(name);
attributeCount = attributes.size();
attributeTypes.resize(attributeCount);
attributeTypeCodes.resize(attributeCount);
attributeNamesIn.resize(attributeCount);
attributeNamesOut.resize(attributeCount);
for (int i = 0; i < attributes.size(); i++) {
attributeNamesIn[i] = attributes[i].getAttributeName();
attributeNamesOut[i] = escapeAttributeName(attributeNamesIn[i]);
attributeTypes[i] = attributes[i].getTypeId();
attributeTypeCodes[i] = attributes[i].getTypeName();
writeAttributeMetadata(i);
}
}
void attribute(const string_t& value) override {
integer_t i = valueCount % attributeCount;
if (i == 0) writeSeparator();
valueCount++;
writeAttribute(attributeNamesOut[i], attributeTypes[i], value);
}
void endOfPipe() {
writeRecordCount();
if (valueCount) writeSeparator();
}
};
}
}
}