src/CSVCommand.cpp
branchv_0
changeset 14 012d491e219a
parent 10 1ae185cac1f3
child 15 157bb1d5e08a
equal deleted inserted replaced
13:aec6c1503dbf 14:012d491e219a
       
     1 /**
       
     2  * Relational pipes
       
     3  * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
       
     4  *
       
     5  * This program is free software: you can redistribute it and/or modify
       
     6  * it under the terms of the GNU General Public License as published by
       
     7  * the Free Software Foundation, version 3 of the License.
       
     8  *
       
     9  * This program is distributed in the hope that it will be useful,
       
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
       
    12  * GNU General Public License for more details.
       
    13  *
       
    14  * You should have received a copy of the GNU General Public License
       
    15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
       
    16  */
       
    17 #include <cstdlib>
       
    18 #include <vector>
       
    19 #include <memory>
       
    20 #include <regex>
       
    21 #include <algorithm>
       
    22 #include <unistd.h>
       
    23 
       
    24 #include <relpipe/writer/RelationalWriter.h>
       
    25 #include <relpipe/writer/RelpipeWriterException.h>
       
    26 #include <relpipe/writer/AttributeMetadata.h>
       
    27 #include <relpipe/writer/Factory.h>
       
    28 #include <relpipe/writer/TypeId.h>
       
    29 
       
    30 #include <relpipe/cli/CLI.h>
       
    31 
       
    32 #include "CSVCommand.h"
       
    33 
       
    34 using namespace std;
       
    35 using namespace relpipe::cli;
       
    36 using namespace relpipe::writer;
       
    37 
       
    38 namespace relpipe {
       
    39 namespace in {
       
    40 namespace csv {
       
    41 
       
    42 bool CSVCommand::readValue(std::istream& input, std::stringstream& currentValue, bool& lastInRecord) {
       
    43 	lastInRecord = false;
       
    44 	char ch;
       
    45 	input.get(ch);
       
    46 	if (ch == '"') {
       
    47 		while (input.get(ch)) {
       
    48 			if (ch == '"') {
       
    49 				input.get(ch);
       
    50 				if (ch == '"') {
       
    51 					currentValue << ch;
       
    52 				} else {
       
    53 					if (ch == '\r') input.get(ch);
       
    54 					if (ch == '\n') lastInRecord = true;
       
    55 					else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)");
       
    56 					return true;
       
    57 				}
       
    58 			} else {
       
    59 				currentValue << ch;
       
    60 			}
       
    61 		}
       
    62 	} else if (ch == ',') {
       
    63 		return true;
       
    64 	} else if (ch == '\n') {
       
    65 		lastInRecord = true;
       
    66 		return true;
       
    67 	} else if (ch == '\r') {
       
    68 		input.get(ch);
       
    69 		if (ch == '\n') {
       
    70 			lastInRecord = true;
       
    71 			return true;
       
    72 		} else {
       
    73 			throw RelpipeWriterException(L"Crazy carriage stuck during journey");
       
    74 		}
       
    75 	} else {
       
    76 		for (currentValue << ch; input.get(ch);) {
       
    77 			switch (ch) {
       
    78 				case ',': return true;
       
    79 				case '\r': break;
       
    80 				case '\n':
       
    81 					lastInRecord = true;
       
    82 					return true;
       
    83 				default: currentValue << ch;
       
    84 			}
       
    85 		}
       
    86 	}
       
    87 	return false;
       
    88 }
       
    89 
       
    90 void CSVCommand::processDataStream(std::ostream& output, std::istream& input, const vector<relpipe::writer::string_t>& args) {
       
    91 	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
       
    92 	std::shared_ptr<RelationalWriter> writer(Factory::create(output));
       
    93 	vector<AttributeMetadata> metadata;
       
    94 	bool headerDone = false;
       
    95 	bool lastInRecord = false;
       
    96 	stringstream currentValue;
       
    97 
       
    98 
       
    99 	while (readValue(input, currentValue, lastInRecord) && input.good()) {
       
   100 		if (headerDone) {
       
   101 			writer->writeAttribute(convertor.from_bytes(currentValue.str()));
       
   102 		} else {
       
   103 			AttributeMetadata am;
       
   104 			am.attributeName = convertor.from_bytes(currentValue.str());
       
   105 			am.typeId = TypeId::STRING;
       
   106 			metadata.push_back(am);
       
   107 			if (lastInRecord) {
       
   108 
       
   109 				/*
       
   110 				 * Usage (simple syntax):
       
   111 				 * relpipe-in-csv → default relation name, attribute names on the first line, all types are string
       
   112 				 * relpipe-in-csv my_relation → custom relation name
       
   113 				 * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data
       
   114 				 * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data
       
   115 				 */
       
   116 
       
   117 				vector<string_t> firstLine;
       
   118 				if (args.size() == (1 + metadata.size())) {
       
   119 					for (int i = 0; i < metadata.size(); i++) {
       
   120 						firstLine.push_back(metadata[i].attributeName);
       
   121 						metadata[i].attributeName = args[1 + i];
       
   122 					}
       
   123 				} else if (args.size() == (1 + 2 * metadata.size())) {
       
   124 					for (int i = 0; i < metadata.size(); i++) {
       
   125 						firstLine.push_back(metadata[i].attributeName);
       
   126 						metadata[i].attributeName = args[1 + i * 2];
       
   127 						metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]);
       
   128 					}
       
   129 				}
       
   130 
       
   131 				headerDone = true;
       
   132 				writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true);
       
   133 				if (firstLine.size()) {
       
   134 					for (string_t value : firstLine) writer->writeAttribute(value);
       
   135 				}
       
   136 			}
       
   137 		}
       
   138 
       
   139 		currentValue.str("");
       
   140 		currentValue.clear();
       
   141 	}
       
   142 }
       
   143 
       
   144 }
       
   145 }
       
   146 }