diff -r aec6c1503dbf -r 012d491e219a src/CSVCommand.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/CSVCommand.cpp Wed Sep 23 11:23:12 2020 +0200 @@ -0,0 +1,146 @@ +/** + * Relational pipes + * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "CSVCommand.h" + +using namespace std; +using namespace relpipe::cli; +using namespace relpipe::writer; + +namespace relpipe { +namespace in { +namespace csv { + +bool CSVCommand::readValue(std::istream& input, std::stringstream& currentValue, bool& lastInRecord) { + lastInRecord = false; + char ch; + input.get(ch); + if (ch == '"') { + while (input.get(ch)) { + if (ch == '"') { + input.get(ch); + if (ch == '"') { + currentValue << ch; + } else { + if (ch == '\r') input.get(ch); + if (ch == '\n') lastInRecord = true; + else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)"); + return true; + } + } else { + currentValue << ch; + } + } + } else if (ch == ',') { + return true; + } else if (ch == '\n') { + lastInRecord = true; + return true; + } else if (ch == '\r') { + input.get(ch); + if (ch == '\n') { + lastInRecord = true; + return true; + } else { + throw RelpipeWriterException(L"Crazy carriage stuck during journey"); + } + } else { + for (currentValue << ch; input.get(ch);) { + switch (ch) { + case ',': return true; + case '\r': break; + case '\n': + lastInRecord = true; + return true; + default: currentValue << ch; + } + } + } + return false; +} + +void CSVCommand::processDataStream(std::ostream& output, std::istream& input, const vector& args) { + wstring_convert < codecvt_utf8> convertor; // UTF-8 is required for CSV + std::shared_ptr writer(Factory::create(output)); + vector metadata; + bool headerDone = false; + bool lastInRecord = false; + stringstream currentValue; + + + while (readValue(input, currentValue, lastInRecord) && input.good()) { + if (headerDone) { + writer->writeAttribute(convertor.from_bytes(currentValue.str())); + } else { + AttributeMetadata am; + am.attributeName = convertor.from_bytes(currentValue.str()); + am.typeId = TypeId::STRING; + metadata.push_back(am); + if (lastInRecord) { + + /* + * Usage (simple syntax): + * relpipe-in-csv → default relation name, attribute names on the first line, all types are string + * relpipe-in-csv my_relation → custom relation name + * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data + * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data + */ + + vector firstLine; + if (args.size() == (1 + metadata.size())) { + for (int i = 0; i < metadata.size(); i++) { + firstLine.push_back(metadata[i].attributeName); + metadata[i].attributeName = args[1 + i]; + } + } else if (args.size() == (1 + 2 * metadata.size())) { + for (int i = 0; i < metadata.size(); i++) { + firstLine.push_back(metadata[i].attributeName); + metadata[i].attributeName = args[1 + i * 2]; + metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]); + } + } + + headerDone = true; + writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true); + if (firstLine.size()) { + for (string_t value : firstLine) writer->writeAttribute(value); + } + } + } + + currentValue.str(""); + currentValue.clear(); + } +} + +} +} +} \ No newline at end of file