src/relpipe-in-csv.cpp
author František Kučera <franta-hg@frantovo.cz>
Thu, 10 Jan 2019 00:10:19 +0100
branchv_0
changeset 3 d7907be4cc40
parent 2 e83895da3e8f
child 10 1ae185cac1f3
permissions -rw-r--r--
allow also custom relation name, attribute names and types (optional)

/**
 * Relational pipes
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include <cstdlib>
#include <vector>
#include <memory>
#include <regex>
#include <algorithm>
#include <unistd.h>

#include <relpipe/writer/RelationalWriter.h>
#include <relpipe/writer/RelpipeWriterException.h>
#include <relpipe/writer/AttributeMetadata.h>
#include <relpipe/writer/Factory.h>
#include <relpipe/writer/TypeId.h>

#include <relpipe/cli/CLI.h>

using namespace std;
using namespace relpipe::cli;
using namespace relpipe::writer;

bool readValue(istream& input, stringstream& currentValue, bool& lastInRecord) {
	lastInRecord = false;
	char ch;
	input.get(ch);
	if (ch == '"') {
		while (input.get(ch)) {
			if (ch == '"') {
				input.get(ch);
				if (ch == '"') {
					currentValue << ch;
				} else {
					if (ch == '\r') input.get(ch);
					if (ch == '\n') lastInRecord = true;
					else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)");
					return true;
				}
			} else {
				currentValue << ch;
			}
		}
	} else if (ch == ',') {
		return true;
	} else if (ch == '\n') {
		lastInRecord = true;
		return true;
	} else if (ch == '\r') {
		input.get(ch);
		if (ch == '\n') {
			lastInRecord = true;
			return true;
		} else {
			throw RelpipeWriterException(L"Crazy carriage stuck during journey");
		}
	} else {
		for (currentValue << ch; input.get(ch);) {
			switch (ch) {
				case ',': return true;
				case '\r': break;
				case '\n':
					lastInRecord = true;
					return true;
				default: currentValue << ch;
			}
		}
	}
	return false;
}

void processDataStream(ostream &output, istream& input, const vector<string_t>& args) {
	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
	std::shared_ptr<RelationalWriter> writer(Factory::create(output));
	vector<AttributeMetadata> metadata;
	bool headerDone = false;
	bool lastInRecord = false;
	stringstream currentValue;


	while (readValue(input, currentValue, lastInRecord) && input.good()) {
		if (headerDone) {
			writer->writeAttribute(convertor.from_bytes(currentValue.str()));
		} else {
			AttributeMetadata am;
			am.attributeName = convertor.from_bytes(currentValue.str());
			am.typeId = TypeId::STRING;
			metadata.push_back(am);
			if (lastInRecord) {

				/*
				 * Usage (simple syntax):
				 * relpipe-in-csv → default relation name, attribute names on the first line, all types are string
				 * relpipe-in-csv my_relation → custom relation name
				 * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data
				 * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data
				 */

				vector<string_t> firstLine;
				if (args.size() == (1 + metadata.size())) {
					for (int i = 0; i < metadata.size(); i++) {
						firstLine.push_back(metadata[i].attributeName);
						metadata[i].attributeName = args[1 + i];
					}
				} else if (args.size() == (1 + 2 * metadata.size())) {
					for (int i = 0; i < metadata.size(); i++) {
						firstLine.push_back(metadata[i].attributeName);
						metadata[i].attributeName = args[1 + i * 2];
						metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]);
					}
				}

				headerDone = true;
				writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true);
				if (firstLine.size()) {
					for (string_t value : firstLine) writer->writeAttribute(value);
				}
			}
		}

		currentValue.str("");
		currentValue.clear();
	}
}

int main(int argc, char** argv) {
	setlocale(LC_ALL, "");
	CLI::untieStdIO();
	CLI cli(argc, argv);

	int resultCode = CLI::EXIT_CODE_UNEXPECTED_ERROR;

	try {
		processDataStream(cout, cin, cli.arguments());
		resultCode = CLI::EXIT_CODE_SUCCESS;
	} catch (RelpipeWriterException e) {
		fwprintf(stderr, L"Caught Writer exception: %ls\n", e.getMessge().c_str());
		fwprintf(stderr, L"Debug: Input stream: eof=%ls, lastRead=%d\n", (cin.eof() ? L"true" : L"false"), cin.gcount());
		resultCode = CLI::EXIT_CODE_DATA_ERROR;
	}

	return resultCode;
}