src/CSVCommand.cpp
author František Kučera <franta-hg@frantovo.cz>
Sat, 13 Nov 2021 17:05:03 +0100
branchv_0
changeset 22 3f6488171e34
parent 21 22eb4838e8d0
permissions -rw-r--r--
check number of values on each row
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
10
1ae185cac1f3 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#include <cstdlib>
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    18
#include <vector>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <memory>
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    20
#include <locale>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <regex>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <algorithm>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <unistd.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <relpipe/writer/RelationalWriter.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <relpipe/writer/RelpipeWriterException.h>
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    27
#include <relpipe/writer/AttributeMetadata.h>
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <relpipe/writer/Factory.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
#include <relpipe/writer/TypeId.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
#include <relpipe/cli/CLI.h>
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    33
#include "CSVCommand.h"
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    34
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
using namespace std;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
using namespace relpipe::cli;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
using namespace relpipe::writer;
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    39
namespace relpipe {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    40
namespace in {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    41
namespace csv {
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    42
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    43
bool CSVCommand::readValue(std::istream& input, std::stringstream& currentValue, bool& lastInRecord) {
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    44
	lastInRecord = false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    45
	char ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    46
	input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    47
	if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    48
		while (input.get(ch)) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    49
			if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    50
				input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    51
				if (ch == '"') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    52
					currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    53
				} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    54
					if (ch == '\r') input.get(ch);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    55
					if (ch == '\n') lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    56
					else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    57
					return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    58
				}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    59
			} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    60
				currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    61
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    62
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    63
	} else if (ch == ',') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    64
		return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    65
	} else if (ch == '\n') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    66
		lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    67
		return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    68
	} else if (ch == '\r') {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    69
		input.get(ch);
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    70
		if (ch == '\n') {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    71
			lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    72
			return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    73
		} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    74
			throw RelpipeWriterException(L"Crazy carriage stuck during journey");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    75
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    76
	} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    77
		for (currentValue << ch; input.get(ch);) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    78
			switch (ch) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    79
				case ',': return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    80
				case '\r': break;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    81
				case '\n':
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    82
					lastInRecord = true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    83
					return true;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    84
				default: currentValue << ch;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    85
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    86
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    87
	}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    88
	return false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    89
}
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    90
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    91
/**
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    92
 * Data types might be encoded in the attribute names: name::type e.g. some_attribute::integer
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    93
 * 
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    94
 * TODO: share this code through relpipe-lib-infertypes (when available)
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    95
 */
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    96
void tryParseTypes(vector<AttributeMetadata>& metadata, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    97
	std::wregex pattern(L"(.*)::(.*)");
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    98
	std::wsmatch match;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
    99
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   100
	if (configuration.readTypes == Configuration::ReadTypes::AUTO || configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   101
		bool hasTypes = true;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   102
		std::vector<TypeId> types;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   103
		std::vector<string_t> names;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   104
		for (AttributeMetadata& am : metadata) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   105
			if (std::regex_match(am.attributeName, match, pattern)) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   106
				names.push_back(match[1]);
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   107
				if (configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   108
					types.push_back(writer->toTypeId(match[2])); // must be valid type name otherwise exception is thrown
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   109
				} else {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   110
					try {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   111
						types.push_back(writer->toTypeId(match[2]));
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   112
					} catch (...) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   113
						hasTypes = false; // ignore exception and keep original names and default type (string)
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   114
					}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   115
				}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   116
			} else {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   117
				hasTypes = false;
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   118
			}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   119
		}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   120
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   121
		if (hasTypes) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   122
			for (int i = 0, count = metadata.size(); i < count; i++) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   123
				metadata[i].attributeName = names[i];
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   124
				metadata[i].typeId = types[i];
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   125
			}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   126
		} else if (configuration.readTypes == Configuration::ReadTypes::TRUE) {
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   127
			throw RelpipeWriterException(L"Types were expected in the CSV header, but not found.");
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   128
		}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   129
	}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   130
}
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   131
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   132
void CSVCommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   133
	wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   134
	vector<AttributeMetadata> metadata;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   135
	bool headerDone = false;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   136
	bool lastInRecord = false;
21
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   137
	integer_t valueCount = 0;
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   138
	stringstream currentValue;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   139
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   140
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   141
	while (readValue(input, currentValue, lastInRecord) && input.good()) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   142
		if (headerDone) {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   143
			writer->writeAttribute(convertor.from_bytes(currentValue.str()));
22
3f6488171e34 check number of values on each row
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   144
21
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   145
			valueCount++;
22
3f6488171e34 check number of values on each row
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   146
			if (valueCount > metadata.size()) throw RelpipeWriterException(L"The number of values " + std::to_wstring(valueCount) + L" exceeds declared column count: " + std::to_wstring(metadata.size()));
3f6488171e34 check number of values on each row
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   147
			if (lastInRecord && valueCount != metadata.size()) throw RelpipeWriterException(L"The number of values " + std::to_wstring(valueCount) + L" is lower than declared column count: " + std::to_wstring(metadata.size()));
3f6488171e34 check number of values on each row
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   148
			if (lastInRecord) valueCount = 0;
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   149
		} else {
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   150
			AttributeMetadata am;
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   151
			am.attributeName = convertor.from_bytes(currentValue.str());
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   152
			am.typeId = TypeId::STRING;
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   153
			metadata.push_back(am);
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   154
			if (lastInRecord) {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   155
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   156
				vector<string_t> firstLine;
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   157
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   158
				if (metadata.size() == configuration.attributes.size()) {
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   159
					for (int i = 0; i < metadata.size(); i++) {
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   160
						firstLine.push_back(metadata[i].attributeName);
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   161
						metadata[i].attributeName = configuration.attributes[i].name;
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   162
						metadata[i].typeId = configuration.attributes[i].type;
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   163
					}
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   164
				} else if (configuration.attributes.size() == 0) {
20
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   165
					// first line contains attribute names and maybe also types
90ae67de2f68 optionally read data types from the CSV header: --read-types (complements relpipe-out-csv --write-types true)
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   166
					tryParseTypes(metadata, writer, configuration);
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   167
				} else {
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   168
					throw RelpipeWriterException(L"Declared attribute count (" + std::to_wstring(configuration.attributes.size()) + L") does not match with number of columns of the first line (" + std::to_wstring(metadata.size()) + L")");
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   169
				}
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   170
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   171
				headerDone = true;
16
15ee963675af change CLI interface: options: --relation --attribute
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   172
				writer->startRelation(configuration.relation, metadata, true);
3
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   173
				if (firstLine.size()) {
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   174
					for (string_t value : firstLine) writer->writeAttribute(value);
d7907be4cc40 allow also custom relation name, attribute names and types (optional)
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   175
				}
1
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   176
			}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   177
		}
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   178
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   179
		currentValue.str("");
5eb4d149c6e2 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   180
		currentValue.clear();
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   181
	}
21
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   182
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   183
	/**
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   184
	 * RFC 4180:
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   185
	 *  - Each line should contain the same number of fields throughout the file.
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   186
	 *  - The last field in the record must not be followed by a comma.
22eb4838e8d0 check total number of values and throw exception if it does not match the number of columns
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   187
	 */
22
3f6488171e34 check number of values on each row
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   188
	if (valueCount && valueCount != metadata.size()) throw RelpipeWriterException(L"Unexpected EOF: The number of values " + std::to_wstring(valueCount) + L" is lower than declared column count: " + std::to_wstring(metadata.size()));
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   189
}
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   190
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   191
}
0
eca0b23802e8 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   192
}
14
012d491e219a separate code into CSVCommand.h and CSVCommand.h
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   193
}