85 } |
85 } |
86 } |
86 } |
87 return false; |
87 return false; |
88 } |
88 } |
89 |
89 |
90 void CSVCommand::process(std::istream& input, const vector<relpipe::writer::string_t>& args, std::shared_ptr<writer::RelationalWriter> writer) { |
90 void CSVCommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) { |
91 wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV |
91 wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV |
92 vector<AttributeMetadata> metadata; |
92 vector<AttributeMetadata> metadata; |
93 bool headerDone = false; |
93 bool headerDone = false; |
94 bool lastInRecord = false; |
94 bool lastInRecord = false; |
95 stringstream currentValue; |
95 stringstream currentValue; |
103 am.attributeName = convertor.from_bytes(currentValue.str()); |
103 am.attributeName = convertor.from_bytes(currentValue.str()); |
104 am.typeId = TypeId::STRING; |
104 am.typeId = TypeId::STRING; |
105 metadata.push_back(am); |
105 metadata.push_back(am); |
106 if (lastInRecord) { |
106 if (lastInRecord) { |
107 |
107 |
108 /* |
108 // TODO: allow types on CLI and names from CSV? |
109 * Usage (simple syntax): |
109 // TODO: allow types on the second line of the CSV? |
110 * relpipe-in-csv → default relation name, attribute names on the first line, all types are string |
110 // TODO: allow regex pattern+replacement for extracting name and type from the first line of the CSV? |
111 * relpipe-in-csv my_relation → custom relation name |
111 // TODO: allow attribute filtering, subset, like relpipe-tr-cur? |
112 * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data |
112 // TODO: allow skipping lines, like tail -n +2 ? |
113 * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data |
113 |
114 */ |
114 vector<string_t> firstLine; |
115 |
115 |
116 vector<string_t> firstLine; |
116 if (metadata.size() == configuration.attributes.size()) { |
117 if (args.size() == (1 + metadata.size())) { |
|
118 for (int i = 0; i < metadata.size(); i++) { |
117 for (int i = 0; i < metadata.size(); i++) { |
119 firstLine.push_back(metadata[i].attributeName); |
118 firstLine.push_back(metadata[i].attributeName); |
120 metadata[i].attributeName = args[1 + i]; |
119 metadata[i].attributeName = configuration.attributes[i].name; |
|
120 metadata[i].typeId = configuration.attributes[i].type; |
121 } |
121 } |
122 } else if (args.size() == (1 + 2 * metadata.size())) { |
122 } else if (configuration.attributes.size() == 0) { |
123 for (int i = 0; i < metadata.size(); i++) { |
123 // first line contains attribute names and type is always string |
124 firstLine.push_back(metadata[i].attributeName); |
124 } else { |
125 metadata[i].attributeName = args[1 + i * 2]; |
125 throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")"); |
126 metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]); |
|
127 } |
|
128 } |
126 } |
129 |
127 |
130 headerDone = true; |
128 headerDone = true; |
131 writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true); |
129 writer->startRelation(configuration.relation, metadata, true); |
132 if (firstLine.size()) { |
130 if (firstLine.size()) { |
133 for (string_t value : firstLine) writer->writeAttribute(value); |
131 for (string_t value : firstLine) writer->writeAttribute(value); |
134 } |
132 } |
135 } |
133 } |
136 } |
134 } |