src/relpipe-in-csv.cpp
branchv_0
changeset 14 012d491e219a
parent 10 1ae185cac1f3
child 15 157bb1d5e08a
equal deleted inserted replaced
13:aec6c1503dbf 14:012d491e219a
    27 #include <relpipe/writer/Factory.h>
    27 #include <relpipe/writer/Factory.h>
    28 #include <relpipe/writer/TypeId.h>
    28 #include <relpipe/writer/TypeId.h>
    29 
    29 
    30 #include <relpipe/cli/CLI.h>
    30 #include <relpipe/cli/CLI.h>
    31 
    31 
       
    32 #include "CSVCommand.h"
       
    33 
    32 using namespace std;
    34 using namespace std;
    33 using namespace relpipe::cli;
    35 using namespace relpipe::cli;
    34 using namespace relpipe::writer;
    36 using namespace relpipe::writer;
    35 
    37 using namespace relpipe::in::csv;
    36 bool readValue(istream& input, stringstream& currentValue, bool& lastInRecord) {
       
    37 	lastInRecord = false;
       
    38 	char ch;
       
    39 	input.get(ch);
       
    40 	if (ch == '"') {
       
    41 		while (input.get(ch)) {
       
    42 			if (ch == '"') {
       
    43 				input.get(ch);
       
    44 				if (ch == '"') {
       
    45 					currentValue << ch;
       
    46 				} else {
       
    47 					if (ch == '\r') input.get(ch);
       
    48 					if (ch == '\n') lastInRecord = true;
       
    49 					else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)");
       
    50 					return true;
       
    51 				}
       
    52 			} else {
       
    53 				currentValue << ch;
       
    54 			}
       
    55 		}
       
    56 	} else if (ch == ',') {
       
    57 		return true;
       
    58 	} else if (ch == '\n') {
       
    59 		lastInRecord = true;
       
    60 		return true;
       
    61 	} else if (ch == '\r') {
       
    62 		input.get(ch);
       
    63 		if (ch == '\n') {
       
    64 			lastInRecord = true;
       
    65 			return true;
       
    66 		} else {
       
    67 			throw RelpipeWriterException(L"Crazy carriage stuck during journey");
       
    68 		}
       
    69 	} else {
       
    70 		for (currentValue << ch; input.get(ch);) {
       
    71 			switch (ch) {
       
    72 				case ',': return true;
       
    73 				case '\r': break;
       
    74 				case '\n':
       
    75 					lastInRecord = true;
       
    76 					return true;
       
    77 				default: currentValue << ch;
       
    78 			}
       
    79 		}
       
    80 	}
       
    81 	return false;
       
    82 }
       
    83 
       
    84 void processDataStream(ostream &output, istream& input, const vector<string_t>& args) {
       
    85 	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
       
    86 	std::shared_ptr<RelationalWriter> writer(Factory::create(output));
       
    87 	vector<AttributeMetadata> metadata;
       
    88 	bool headerDone = false;
       
    89 	bool lastInRecord = false;
       
    90 	stringstream currentValue;
       
    91 
       
    92 
       
    93 	while (readValue(input, currentValue, lastInRecord) && input.good()) {
       
    94 		if (headerDone) {
       
    95 			writer->writeAttribute(convertor.from_bytes(currentValue.str()));
       
    96 		} else {
       
    97 			AttributeMetadata am;
       
    98 			am.attributeName = convertor.from_bytes(currentValue.str());
       
    99 			am.typeId = TypeId::STRING;
       
   100 			metadata.push_back(am);
       
   101 			if (lastInRecord) {
       
   102 
       
   103 				/*
       
   104 				 * Usage (simple syntax):
       
   105 				 * relpipe-in-csv → default relation name, attribute names on the first line, all types are string
       
   106 				 * relpipe-in-csv my_relation → custom relation name
       
   107 				 * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data
       
   108 				 * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data
       
   109 				 */
       
   110 
       
   111 				vector<string_t> firstLine;
       
   112 				if (args.size() == (1 + metadata.size())) {
       
   113 					for (int i = 0; i < metadata.size(); i++) {
       
   114 						firstLine.push_back(metadata[i].attributeName);
       
   115 						metadata[i].attributeName = args[1 + i];
       
   116 					}
       
   117 				} else if (args.size() == (1 + 2 * metadata.size())) {
       
   118 					for (int i = 0; i < metadata.size(); i++) {
       
   119 						firstLine.push_back(metadata[i].attributeName);
       
   120 						metadata[i].attributeName = args[1 + i * 2];
       
   121 						metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]);
       
   122 					}
       
   123 				}
       
   124 
       
   125 				headerDone = true;
       
   126 				writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true);
       
   127 				if (firstLine.size()) {
       
   128 					for (string_t value : firstLine) writer->writeAttribute(value);
       
   129 				}
       
   130 			}
       
   131 		}
       
   132 
       
   133 		currentValue.str("");
       
   134 		currentValue.clear();
       
   135 	}
       
   136 }
       
   137 
    38 
   138 int main(int argc, char** argv) {
    39 int main(int argc, char** argv) {
   139 	setlocale(LC_ALL, "");
    40 	setlocale(LC_ALL, "");
   140 	CLI::untieStdIO();
    41 	CLI::untieStdIO();
   141 	CLI cli(argc, argv);
    42 	CLI cli(argc, argv);
   142 
    43 
   143 	int resultCode = CLI::EXIT_CODE_UNEXPECTED_ERROR;
    44 	int resultCode = CLI::EXIT_CODE_UNEXPECTED_ERROR;
   144 
    45 
   145 	try {
    46 	try {
   146 		processDataStream(cout, cin, cli.arguments());
    47 		CSVCommand command;
       
    48 		command.processDataStream(cout, cin, cli.arguments());
   147 		resultCode = CLI::EXIT_CODE_SUCCESS;
    49 		resultCode = CLI::EXIT_CODE_SUCCESS;
   148 	} catch (RelpipeWriterException e) {
    50 	} catch (RelpipeWriterException e) {
   149 		fwprintf(stderr, L"Caught Writer exception: %ls\n", e.getMessge().c_str());
    51 		fwprintf(stderr, L"Caught Writer exception: %ls\n", e.getMessge().c_str());
   150 		fwprintf(stderr, L"Debug: Input stream: eof=%ls, lastRead=%d\n", (cin.eof() ? L"true" : L"false"), cin.gcount());
    52 		fwprintf(stderr, L"Debug: Input stream: eof=%ls, lastRead=%d\n", (cin.eof() ? L"true" : L"false"), cin.gcount());
   151 		resultCode = CLI::EXIT_CODE_DATA_ERROR;
    53 		resultCode = CLI::EXIT_CODE_DATA_ERROR;