src/CSVCommand.cpp
author František Kučera <franta-hg@frantovo.cz>
Sat, 13 Nov 2021 11:52:54 +0100
branchv_0
changeset 21 22eb4838e8d0
parent 20 90ae67de2f68
child 22 3f6488171e34
permissions -rw-r--r--
check total number of values and throw exception if it does not match the number of columns
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
10
1ae185cac1f3 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#include <cstdlib>
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    18
#include <vector>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <memory>
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    20
#include <locale>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <regex>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <algorithm>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <unistd.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <relpipe/writer/RelationalWriter.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <relpipe/writer/RelpipeWriterException.h>
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    27
#include <relpipe/writer/AttributeMetadata.h>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <relpipe/writer/Factory.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
#include <relpipe/writer/TypeId.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
#include <relpipe/cli/CLI.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    33
#include "CSVCommand.h"
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    34
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
using namespace std;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
using namespace relpipe::cli;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
using namespace relpipe::writer;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    39
namespace relpipe {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    40
namespace in {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    41
namespace csv {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    42
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    43
bool CSVCommand::readValue(std::istream& input, std::stringstream& currentValue, bool& lastInRecord) {
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    44
	lastInRecord = false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    45
	char ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    46
	input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    47
	if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    48
		while (input.get(ch)) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    49
			if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    50
				input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    51
				if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    52
					currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    53
				} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    54
					if (ch == '\r') input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    55
					if (ch == '\n') lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    56
					else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    57
					return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    58
				}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    59
			} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    60
				currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    61
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    62
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    63
	} else if (ch == ',') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    64
		return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    65
	} else if (ch == '\n') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    66
		lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    67
		return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    68
	} else if (ch == '\r') {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    69
		input.get(ch);
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    70
		if (ch == '\n') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    71
			lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    72
			return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    73
		} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    74
			throw RelpipeWriterException(L"Crazy carriage stuck during journey");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    75
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    76
	} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    77
		for (currentValue << ch; input.get(ch);) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    78
			switch (ch) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    79
				case ',': return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    80
				case '\r': break;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    81
				case '\n':
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    82
					lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    83
					return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    84
				default: currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    85
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    86
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    87
	}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    88
	return false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    89
}
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    90
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    91
/**
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    92
 * Data types might be encoded in the attribute names: name::type e.g. some_attribute::integer
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    93
 * 
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    94
 * TODO: share this code through relpipe-lib-infertypes (when available)
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    95
 */
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    96
void tryParseTypes(vector<AttributeMetadata>& metadata, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    97
	std::wregex pattern(L"(.*)::(.*)");
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    98
	std::wsmatch match;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    99
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   100
	if (configuration.readTypes == Configuration::ReadTypes::AUTO || configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   101
		bool hasTypes = true;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   102
		std::vector<TypeId> types;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   103
		std::vector<string_t> names;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   104
		for (AttributeMetadata& am : metadata) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   105
			if (std::regex_match(am.attributeName, match, pattern)) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   106
				names.push_back(match[1]);
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   107
				if (configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   108
					types.push_back(writer->toTypeId(match[2])); // must be valid type name otherwise exception is thrown
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   109
				} else {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   110
					try {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   111
						types.push_back(writer->toTypeId(match[2]));
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   112
					} catch (...) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   113
						hasTypes = false; // ignore exception and keep original names and default type (string)
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   114
					}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   115
				}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   116
			} else {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   117
				hasTypes = false;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   118
			}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   119
		}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   120
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   121
		if (hasTypes) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   122
			for (int i = 0, count = metadata.size(); i < count; i++) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   123
				metadata[i].attributeName = names[i];
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   124
				metadata[i].typeId = types[i];
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   125
			}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   126
		} else if (configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   127
			throw RelpipeWriterException(L"Types were expected in the CSV header, but not found.");
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   128
		}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   129
	}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   130
}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   131
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   132
void CSVCommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   133
	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   134
	vector<AttributeMetadata> metadata;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   135
	bool headerDone = false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   136
	bool lastInRecord = false;
21
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   137
	integer_t valueCount = 0;
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   138
	stringstream currentValue;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   139
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   140
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   141
	while (readValue(input, currentValue, lastInRecord) && input.good()) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   142
		if (headerDone) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   143
			writer->writeAttribute(convertor.from_bytes(currentValue.str()));
21
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   144
			valueCount++;
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   145
		} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   146
			AttributeMetadata am;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   147
			am.attributeName = convertor.from_bytes(currentValue.str());
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   148
			am.typeId = TypeId::STRING;
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   149
			metadata.push_back(am);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   150
			if (lastInRecord) {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   151
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   152
				vector<string_t> firstLine;
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   153
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   154
				if (metadata.size() == configuration.attributes.size()) {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   155
					for (int i = 0; i < metadata.size(); i++) {
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   156
						firstLine.push_back(metadata[i].attributeName);
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   157
						metadata[i].attributeName = configuration.attributes[i].name;
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   158
						metadata[i].typeId = configuration.attributes[i].type;
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   159
					}
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   160
				} else if (configuration.attributes.size() == 0) {
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   161
					// first line contains attribute names and maybe also types
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   162
					tryParseTypes(metadata, writer, configuration);
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   163
				} else {
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   164
					throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")");
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   165
				}
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   166
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   167
				headerDone = true;
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   168
				writer->startRelation(configuration.relation, metadata, true);
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   169
				if (firstLine.size()) {
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   170
					for (string_t value : firstLine) writer->writeAttribute(value);
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   171
				}
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   172
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   173
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   174
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   175
		currentValue.str("");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   176
		currentValue.clear();
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   177
	}
21
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   178
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   179
	/**
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   180
	 * RFC 4180:
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   181
	 *  - Each line should contain the same number of fields throughout the file.
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   182
	 *  - The last field in the record must not be followed by a comma.
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   183
	 */
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   184
	if (valueCount % metadata.size()) throw RelpipeWriterException(L"The total number of values " + std::to_wstring(valueCount) + L" does not match the number of declared columns " + std::to_wstring(metadata.size()));
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   185
}
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   186
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   187
}
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   188
}
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   189
}