85 } |
86 } |
86 } |
87 } |
87 return false; |
88 return false; |
88 } |
89 } |
89 |
90 |
|
91 /** |
|
92 * Data types might be encoded in the attribute names: name::type e.g. some_attribute::integer |
|
93 * |
|
94 * TODO: share this code through relpipe-lib-infertypes (when available) |
|
95 */ |
|
96 void tryParseTypes(vector<AttributeMetadata>& metadata, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) { |
|
97 std::wregex pattern(L"(.*)::(.*)"); |
|
98 std::wsmatch match; |
|
99 |
|
100 if (configuration.readTypes == Configuration::ReadTypes::AUTO || configuration.readTypes == Configuration::ReadTypes::TRUE) { |
|
101 bool hasTypes = true; |
|
102 std::vector<TypeId> types; |
|
103 std::vector<string_t> names; |
|
104 for (AttributeMetadata& am : metadata) { |
|
105 if (std::regex_match(am.attributeName, match, pattern)) { |
|
106 names.push_back(match[1]); |
|
107 if (configuration.readTypes == Configuration::ReadTypes::TRUE) { |
|
108 types.push_back(writer->toTypeId(match[2])); // must be valid type name otherwise exception is thrown |
|
109 } else { |
|
110 try { |
|
111 types.push_back(writer->toTypeId(match[2])); |
|
112 } catch (...) { |
|
113 hasTypes = false; // ignore exception and keep original names and default type (string) |
|
114 } |
|
115 } |
|
116 } else { |
|
117 hasTypes = false; |
|
118 } |
|
119 } |
|
120 |
|
121 if (hasTypes) { |
|
122 for (int i = 0, count = metadata.size(); i < count; i++) { |
|
123 metadata[i].attributeName = names[i]; |
|
124 metadata[i].typeId = types[i]; |
|
125 } |
|
126 } else if (configuration.readTypes == Configuration::ReadTypes::TRUE) { |
|
127 throw RelpipeWriterException(L"Types were expected in the CSV header, but not found."); |
|
128 } |
|
129 } |
|
130 } |
|
131 |
90 void CSVCommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) { |
132 void CSVCommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) { |
91 wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV |
133 wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV |
92 vector<AttributeMetadata> metadata; |
134 vector<AttributeMetadata> metadata; |
93 bool headerDone = false; |
135 bool headerDone = false; |
94 bool lastInRecord = false; |
136 bool lastInRecord = false; |
103 am.attributeName = convertor.from_bytes(currentValue.str()); |
145 am.attributeName = convertor.from_bytes(currentValue.str()); |
104 am.typeId = TypeId::STRING; |
146 am.typeId = TypeId::STRING; |
105 metadata.push_back(am); |
147 metadata.push_back(am); |
106 if (lastInRecord) { |
148 if (lastInRecord) { |
107 |
149 |
108 // TODO: allow types on CLI and names from CSV? |
|
109 // TODO: allow types on the second line of the CSV? |
|
110 // TODO: allow regex pattern+replacement for extracting name and type from the first line of the CSV? |
|
111 // TODO: allow attribute filtering, subset, like relpipe-tr-cur? |
|
112 // TODO: allow skipping lines, like tail -n +2 ? |
|
113 |
|
114 vector<string_t> firstLine; |
150 vector<string_t> firstLine; |
115 |
151 |
116 if (metadata.size() == configuration.attributes.size()) { |
152 if (metadata.size() == configuration.attributes.size()) { |
117 for (int i = 0; i < metadata.size(); i++) { |
153 for (int i = 0; i < metadata.size(); i++) { |
118 firstLine.push_back(metadata[i].attributeName); |
154 firstLine.push_back(metadata[i].attributeName); |
119 metadata[i].attributeName = configuration.attributes[i].name; |
155 metadata[i].attributeName = configuration.attributes[i].name; |
120 metadata[i].typeId = configuration.attributes[i].type; |
156 metadata[i].typeId = configuration.attributes[i].type; |
121 } |
157 } |
122 } else if (configuration.attributes.size() == 0) { |
158 } else if (configuration.attributes.size() == 0) { |
123 // first line contains attribute names and type is always string |
159 // first line contains attribute names and maybe also types |
|
160 tryParseTypes(metadata, writer, configuration); |
124 } else { |
161 } else { |
125 throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")"); |
162 throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")"); |
126 } |
163 } |
127 |
164 |
128 headerDone = true; |
165 headerDone = true; |