--- a/src/CSVCommand.cpp Sun Apr 18 10:53:28 2021 +0200
+++ b/src/CSVCommand.cpp Sun Apr 18 18:20:09 2021 +0200
@@ -17,6 +17,7 @@
#include <cstdlib>
#include <vector>
#include <memory>
+#include <locale>
#include <regex>
#include <algorithm>
#include <unistd.h>
@@ -87,6 +88,47 @@
return false;
}
+/**
+ * Data types might be encoded in the attribute names: name::type e.g. some_attribute::integer
+ *
+ * TODO: share this code through relpipe-lib-infertypes (when available)
+ */
+void tryParseTypes(vector<AttributeMetadata>& metadata, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
+ std::wregex pattern(L"(.*)::(.*)");
+ std::wsmatch match;
+
+ if (configuration.readTypes == Configuration::ReadTypes::AUTO || configuration.readTypes == Configuration::ReadTypes::TRUE) {
+ bool hasTypes = true;
+ std::vector<TypeId> types;
+ std::vector<string_t> names;
+ for (AttributeMetadata& am : metadata) {
+ if (std::regex_match(am.attributeName, match, pattern)) {
+ names.push_back(match[1]);
+ if (configuration.readTypes == Configuration::ReadTypes::TRUE) {
+ types.push_back(writer->toTypeId(match[2])); // must be valid type name otherwise exception is thrown
+ } else {
+ try {
+ types.push_back(writer->toTypeId(match[2]));
+ } catch (...) {
+ hasTypes = false; // ignore exception and keep original names and default type (string)
+ }
+ }
+ } else {
+ hasTypes = false;
+ }
+ }
+
+ if (hasTypes) {
+ for (int i = 0, count = metadata.size(); i < count; i++) {
+ metadata[i].attributeName = names[i];
+ metadata[i].typeId = types[i];
+ }
+ } else if (configuration.readTypes == Configuration::ReadTypes::TRUE) {
+ throw RelpipeWriterException(L"Types were expected in the CSV header, but not found.");
+ }
+ }
+}
+
void CSVCommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
vector<AttributeMetadata> metadata;
@@ -105,12 +147,6 @@
metadata.push_back(am);
if (lastInRecord) {
- // TODO: allow types on CLI and names from CSV?
- // TODO: allow types on the second line of the CSV?
- // TODO: allow regex pattern+replacement for extracting name and type from the first line of the CSV?
- // TODO: allow attribute filtering, subset, like relpipe-tr-cur?
- // TODO: allow skipping lines, like tail -n +2 ?
-
vector<string_t> firstLine;
if (metadata.size() == configuration.attributes.size()) {
@@ -120,7 +156,8 @@
metadata[i].typeId = configuration.attributes[i].type;
}
} else if (configuration.attributes.size() == 0) {
- // first line contains attribute names and type is always string
+ // first line contains attribute names and maybe also types
+ tryParseTypes(metadata, writer, configuration);
} else {
throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")");
}