src/CSVCommand.cpp
branchv_0
changeset 14 012d491e219a
parent 10 1ae185cac1f3
child 15 157bb1d5e08a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/CSVCommand.cpp	Wed Sep 23 11:23:12 2020 +0200
@@ -0,0 +1,146 @@
+/**
+ * Relational pipes
+ * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <cstdlib>
+#include <vector>
+#include <memory>
+#include <regex>
+#include <algorithm>
+#include <unistd.h>
+
+#include <relpipe/writer/RelationalWriter.h>
+#include <relpipe/writer/RelpipeWriterException.h>
+#include <relpipe/writer/AttributeMetadata.h>
+#include <relpipe/writer/Factory.h>
+#include <relpipe/writer/TypeId.h>
+
+#include <relpipe/cli/CLI.h>
+
+#include "CSVCommand.h"
+
+using namespace std;
+using namespace relpipe::cli;
+using namespace relpipe::writer;
+
+namespace relpipe {
+namespace in {
+namespace csv {
+
+bool CSVCommand::readValue(std::istream& input, std::stringstream& currentValue, bool& lastInRecord) {
+	lastInRecord = false;
+	char ch;
+	input.get(ch);
+	if (ch == '"') {
+		while (input.get(ch)) {
+			if (ch == '"') {
+				input.get(ch);
+				if (ch == '"') {
+					currentValue << ch;
+				} else {
+					if (ch == '\r') input.get(ch);
+					if (ch == '\n') lastInRecord = true;
+					else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)");
+					return true;
+				}
+			} else {
+				currentValue << ch;
+			}
+		}
+	} else if (ch == ',') {
+		return true;
+	} else if (ch == '\n') {
+		lastInRecord = true;
+		return true;
+	} else if (ch == '\r') {
+		input.get(ch);
+		if (ch == '\n') {
+			lastInRecord = true;
+			return true;
+		} else {
+			throw RelpipeWriterException(L"Crazy carriage stuck during journey");
+		}
+	} else {
+		for (currentValue << ch; input.get(ch);) {
+			switch (ch) {
+				case ',': return true;
+				case '\r': break;
+				case '\n':
+					lastInRecord = true;
+					return true;
+				default: currentValue << ch;
+			}
+		}
+	}
+	return false;
+}
+
+void CSVCommand::processDataStream(std::ostream& output, std::istream& input, const vector<relpipe::writer::string_t>& args) {
+	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
+	std::shared_ptr<RelationalWriter> writer(Factory::create(output));
+	vector<AttributeMetadata> metadata;
+	bool headerDone = false;
+	bool lastInRecord = false;
+	stringstream currentValue;
+
+
+	while (readValue(input, currentValue, lastInRecord) && input.good()) {
+		if (headerDone) {
+			writer->writeAttribute(convertor.from_bytes(currentValue.str()));
+		} else {
+			AttributeMetadata am;
+			am.attributeName = convertor.from_bytes(currentValue.str());
+			am.typeId = TypeId::STRING;
+			metadata.push_back(am);
+			if (lastInRecord) {
+
+				/*
+				 * Usage (simple syntax):
+				 * relpipe-in-csv → default relation name, attribute names on the first line, all types are string
+				 * relpipe-in-csv my_relation → custom relation name
+				 * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data
+				 * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data
+				 */
+
+				vector<string_t> firstLine;
+				if (args.size() == (1 + metadata.size())) {
+					for (int i = 0; i < metadata.size(); i++) {
+						firstLine.push_back(metadata[i].attributeName);
+						metadata[i].attributeName = args[1 + i];
+					}
+				} else if (args.size() == (1 + 2 * metadata.size())) {
+					for (int i = 0; i < metadata.size(); i++) {
+						firstLine.push_back(metadata[i].attributeName);
+						metadata[i].attributeName = args[1 + i * 2];
+						metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]);
+					}
+				}
+
+				headerDone = true;
+				writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true);
+				if (firstLine.size()) {
+					for (string_t value : firstLine) writer->writeAttribute(value);
+				}
+			}
+		}
+
+		currentValue.str("");
+		currentValue.clear();
+	}
+}
+
+}
+}
+}
\ No newline at end of file