src/CSVCommand.cpp
branchv_0
changeset 16 15ee963675af
parent 15 157bb1d5e08a
child 20 90ae67de2f68
equal deleted inserted replaced
15:157bb1d5e08a 16:15ee963675af
    85 		}
    85 		}
    86 	}
    86 	}
    87 	return false;
    87 	return false;
    88 }
    88 }
    89 
    89 
    90 void CSVCommand::process(std::istream& input, const vector<relpipe::writer::string_t>& args, std::shared_ptr<writer::RelationalWriter> writer) {
    90 void CSVCommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
    91 	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
    91 	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
    92 	vector<AttributeMetadata> metadata;
    92 	vector<AttributeMetadata> metadata;
    93 	bool headerDone = false;
    93 	bool headerDone = false;
    94 	bool lastInRecord = false;
    94 	bool lastInRecord = false;
    95 	stringstream currentValue;
    95 	stringstream currentValue;
   103 			am.attributeName = convertor.from_bytes(currentValue.str());
   103 			am.attributeName = convertor.from_bytes(currentValue.str());
   104 			am.typeId = TypeId::STRING;
   104 			am.typeId = TypeId::STRING;
   105 			metadata.push_back(am);
   105 			metadata.push_back(am);
   106 			if (lastInRecord) {
   106 			if (lastInRecord) {
   107 
   107 
   108 				/*
   108 				// TODO: allow types on CLI and names from CSV?
   109 				 * Usage (simple syntax):
   109 				// TODO: allow types on the second line of the CSV?
   110 				 * relpipe-in-csv → default relation name, attribute names on the first line, all types are string
   110 				// TODO: allow regex pattern+replacement for extracting name and type from the first line of the CSV?
   111 				 * relpipe-in-csv my_relation → custom relation name
   111 				// TODO: allow attribute filtering, subset, like relpipe-tr-cur?
   112 				 * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data
   112 				// TODO: allow skipping lines, like tail -n +2 ?
   113 				 * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data
   113 				
   114 				 */
   114 				vector<string_t> firstLine;
   115 
   115 
   116 				vector<string_t> firstLine;
   116 				if (metadata.size() == configuration.attributes.size()) {
   117 				if (args.size() == (1 + metadata.size())) {
       
   118 					for (int i = 0; i < metadata.size(); i++) {
   117 					for (int i = 0; i < metadata.size(); i++) {
   119 						firstLine.push_back(metadata[i].attributeName);
   118 						firstLine.push_back(metadata[i].attributeName);
   120 						metadata[i].attributeName = args[1 + i];
   119 						metadata[i].attributeName = configuration.attributes[i].name;
       
   120 						metadata[i].typeId = configuration.attributes[i].type;
   121 					}
   121 					}
   122 				} else if (args.size() == (1 + 2 * metadata.size())) {
   122 				} else if (configuration.attributes.size() == 0) {
   123 					for (int i = 0; i < metadata.size(); i++) {
   123 					// first line contains attribute names and type is always string
   124 						firstLine.push_back(metadata[i].attributeName);
   124 				} else {
   125 						metadata[i].attributeName = args[1 + i * 2];
   125 					throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")");
   126 						metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]);
       
   127 					}
       
   128 				}
   126 				}
   129 
   127 
   130 				headerDone = true;
   128 				headerDone = true;
   131 				writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true);
   129 				writer->startRelation(configuration.relation, metadata, true);
   132 				if (firstLine.size()) {
   130 				if (firstLine.size()) {
   133 					for (string_t value : firstLine) writer->writeAttribute(value);
   131 					for (string_t value : firstLine) writer->writeAttribute(value);
   134 				}
   132 				}
   135 			}
   133 			}
   136 		}
   134 		}