src/CSVCommand.cpp
author František Kučera <franta-hg@frantovo.cz>
Sun, 18 Apr 2021 18:20:09 +0200
branchv_0
changeset 20 90ae67de2f68
parent 16 15ee963675af
child 21 22eb4838e8d0
permissions -rw-r--r--
optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
10
1ae185cac1f3 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#include <cstdlib>
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    18
#include <vector>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <memory>
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    20
#include <locale>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <regex>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <algorithm>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <unistd.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <relpipe/writer/RelationalWriter.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <relpipe/writer/RelpipeWriterException.h>
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    27
#include <relpipe/writer/AttributeMetadata.h>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <relpipe/writer/Factory.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
#include <relpipe/writer/TypeId.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
#include <relpipe/cli/CLI.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    33
#include "CSVCommand.h"
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    34
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
using namespace std;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
using namespace relpipe::cli;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
using namespace relpipe::writer;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    39
namespace relpipe {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    40
namespace in {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    41
namespace csv {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    42
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    43
bool CSVCommand::readValue(std::istream& input, std::stringstream& currentValue, bool& lastInRecord) {
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    44
	lastInRecord = false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    45
	char ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    46
	input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    47
	if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    48
		while (input.get(ch)) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    49
			if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    50
				input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    51
				if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    52
					currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    53
				} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    54
					if (ch == '\r') input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    55
					if (ch == '\n') lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    56
					else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    57
					return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    58
				}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    59
			} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    60
				currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    61
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    62
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    63
	} else if (ch == ',') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    64
		return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    65
	} else if (ch == '\n') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    66
		lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    67
		return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    68
	} else if (ch == '\r') {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    69
		input.get(ch);
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    70
		if (ch == '\n') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    71
			lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    72
			return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    73
		} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    74
			throw RelpipeWriterException(L"Crazy carriage stuck during journey");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    75
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    76
	} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    77
		for (currentValue << ch; input.get(ch);) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    78
			switch (ch) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    79
				case ',': return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    80
				case '\r': break;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    81
				case '\n':
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    82
					lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    83
					return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    84
				default: currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    85
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    86
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    87
	}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    88
	return false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    89
}
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    90
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    91
/**
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    92
 * Data types might be encoded in the attribute names: name::type e.g. some_attribute::integer
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    93
 * 
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    94
 * TODO: share this code through relpipe-lib-infertypes (when available)
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    95
 */
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    96
void tryParseTypes(vector<AttributeMetadata>& metadata, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    97
	std::wregex pattern(L"(.*)::(.*)");
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    98
	std::wsmatch match;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    99
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   100
	if (configuration.readTypes == Configuration::ReadTypes::AUTO || configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   101
		bool hasTypes = true;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   102
		std::vector<TypeId> types;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   103
		std::vector<string_t> names;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   104
		for (AttributeMetadata& am : metadata) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   105
			if (std::regex_match(am.attributeName, match, pattern)) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   106
				names.push_back(match[1]);
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   107
				if (configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   108
					types.push_back(writer->toTypeId(match[2])); // must be valid type name otherwise exception is thrown
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   109
				} else {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   110
					try {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   111
						types.push_back(writer->toTypeId(match[2]));
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   112
					} catch (...) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   113
						hasTypes = false; // ignore exception and keep original names and default type (string)
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   114
					}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   115
				}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   116
			} else {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   117
				hasTypes = false;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   118
			}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   119
		}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   120
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   121
		if (hasTypes) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   122
			for (int i = 0, count = metadata.size(); i < count; i++) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   123
				metadata[i].attributeName = names[i];
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   124
				metadata[i].typeId = types[i];
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   125
			}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   126
		} else if (configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   127
			throw RelpipeWriterException(L"Types were expected in the CSV header, but not found.");
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   128
		}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   129
	}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   130
}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   131
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   132
void CSVCommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   133
	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   134
	vector<AttributeMetadata> metadata;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   135
	bool headerDone = false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   136
	bool lastInRecord = false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   137
	stringstream currentValue;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   138
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   139
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   140
	while (readValue(input, currentValue, lastInRecord) && input.good()) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   141
		if (headerDone) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   142
			writer->writeAttribute(convertor.from_bytes(currentValue.str()));
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   143
		} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   144
			AttributeMetadata am;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   145
			am.attributeName = convertor.from_bytes(currentValue.str());
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   146
			am.typeId = TypeId::STRING;
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   147
			metadata.push_back(am);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   148
			if (lastInRecord) {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   149
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   150
				vector<string_t> firstLine;
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   151
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   152
				if (metadata.size() == configuration.attributes.size()) {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   153
					for (int i = 0; i < metadata.size(); i++) {
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   154
						firstLine.push_back(metadata[i].attributeName);
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   155
						metadata[i].attributeName = configuration.attributes[i].name;
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   156
						metadata[i].typeId = configuration.attributes[i].type;
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   157
					}
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   158
				} else if (configuration.attributes.size() == 0) {
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   159
					// first line contains attribute names and maybe also types
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   160
					tryParseTypes(metadata, writer, configuration);
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   161
				} else {
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   162
					throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")");
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   163
				}
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   164
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   165
				headerDone = true;
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   166
				writer->startRelation(configuration.relation, metadata, true);
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   167
				if (firstLine.size()) {
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   168
					for (string_t value : firstLine) writer->writeAttribute(value);
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   169
				}
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   170
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   171
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   172
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   173
		currentValue.str("");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   174
		currentValue.clear();
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   175
	}
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   176
}
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   177
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   178
}
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   179
}
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   180
}