diff -r 0d858e0eedf8 -r 90ae67de2f68 src/CSVCommand.cpp --- a/src/CSVCommand.cpp Sun Apr 18 10:53:28 2021 +0200 +++ b/src/CSVCommand.cpp Sun Apr 18 18:20:09 2021 +0200 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,47 @@ return false; } +/** + * Data types might be encoded in the attribute names: name::type e.g. some_attribute::integer + * + * TODO: share this code through relpipe-lib-infertypes (when available) + */ +void tryParseTypes(vector& metadata, std::shared_ptr writer, Configuration& configuration) { + std::wregex pattern(L"(.*)::(.*)"); + std::wsmatch match; + + if (configuration.readTypes == Configuration::ReadTypes::AUTO || configuration.readTypes == Configuration::ReadTypes::TRUE) { + bool hasTypes = true; + std::vector types; + std::vector names; + for (AttributeMetadata& am : metadata) { + if (std::regex_match(am.attributeName, match, pattern)) { + names.push_back(match[1]); + if (configuration.readTypes == Configuration::ReadTypes::TRUE) { + types.push_back(writer->toTypeId(match[2])); // must be valid type name otherwise exception is thrown + } else { + try { + types.push_back(writer->toTypeId(match[2])); + } catch (...) { + hasTypes = false; // ignore exception and keep original names and default type (string) + } + } + } else { + hasTypes = false; + } + } + + if (hasTypes) { + for (int i = 0, count = metadata.size(); i < count; i++) { + metadata[i].attributeName = names[i]; + metadata[i].typeId = types[i]; + } + } else if (configuration.readTypes == Configuration::ReadTypes::TRUE) { + throw RelpipeWriterException(L"Types were expected in the CSV header, but not found."); + } + } +} + void CSVCommand::process(std::istream& input, std::shared_ptr writer, Configuration& configuration) { wstring_convert < codecvt_utf8> convertor; // UTF-8 is required for CSV vector metadata; @@ -105,12 +147,6 @@ metadata.push_back(am); if (lastInRecord) { - // TODO: allow types on CLI and names from CSV? - // TODO: allow types on the second line of the CSV? - // TODO: allow regex pattern+replacement for extracting name and type from the first line of the CSV? - // TODO: allow attribute filtering, subset, like relpipe-tr-cur? - // TODO: allow skipping lines, like tail -n +2 ? - vector firstLine; if (metadata.size() == configuration.attributes.size()) { @@ -120,7 +156,8 @@ metadata[i].typeId = configuration.attributes[i].type; } } else if (configuration.attributes.size() == 0) { - // first line contains attribute names and type is always string + // first line contains attribute names and maybe also types + tryParseTypes(metadata, writer, configuration); } else { throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")"); }