src/CSVHandler.h
author František Kučera <franta-hg@frantovo.cz>
Sat, 27 Aug 2022 21:56:52 +0200
branchv_0
changeset 21 af4cb72127c1
parent 17 ea36eed9683f
permissions -rw-r--r--
do UNION ALL if multiple relations have same number of attributes of same types
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
10
4bcf3fb7cc48 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
0
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#pragma once
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <memory>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
#include <string>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <vector>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <iostream>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <sstream>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
#include <locale>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <codecvt>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
#include <relpipe/reader/typedefs.h>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <relpipe/reader/TypeId.h>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
#include <relpipe/reader/handlers/AttributeMetadata.h>
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
14
a7596589a5b0 change CLI interface: options: --write-header
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    32
#include "Configuration.h"
1
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    33
#include "RelpipeCSVWriterException.h"
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    34
0
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
namespace relpipe {
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
namespace out {
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
namespace csv {
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
using namespace relpipe;
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    40
using namespace relpipe::reader;
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    41
using namespace relpipe::reader::handlers;
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    42
6
de4c706edf41 fix typo: Hadler → Handler
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    43
class CSVHandler : public RelationalReaderStringHandler {
0
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    44
private:
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
	std::ostream& output;
14
a7596589a5b0 change CLI interface: options: --write-header
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    46
	Configuration& configuration;
1
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    47
	const char QUOTE = '"';
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    48
	std::wstring_convert<std::codecvt_utf8<wchar_t>> convertor; // TODO: local system encoding or generate CSV always in UTF-8 like XML?
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    49
	std::vector<AttributeMetadata> firstAttributes;
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    50
	integer_t valueCount = 0;
21
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    51
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    52
	/**
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    53
	 * @param a
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    54
	 * @param b
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    55
	 * @return true if relations have same number and types of attributes (names may differ)
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    56
	 */
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    57
	bool matches(const std::vector<AttributeMetadata>& a, const std::vector<AttributeMetadata>& b) {
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    58
		if (a.size() != b.size()) return false;
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    59
		for (int i = 0, limit = a.size(); i < limit; i++) if (a[i].getTypeId() != b[i].getTypeId()) return false;
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    60
		return true;
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    61
	}
0
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    62
public:
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    63
14
a7596589a5b0 change CLI interface: options: --write-header
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    64
	CSVHandler(std::ostream& output, Configuration& configuration) : output(output), configuration(configuration) {
0
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    65
	}
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    66
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    67
	void startRelation(string_t name, std::vector<AttributeMetadata> attributes) override {
1
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    68
		if (firstAttributes.empty()) {
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    69
			firstAttributes = attributes;
17
ea36eed9683f optionally write data types into the CSV header: --write-types
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    70
			if (configuration.writeHeader) for (auto attr : attributes) attribute(configuration.writeTypes ? attr.getAttributeName() + L"::" + attr.getTypeName() : attr.getAttributeName());
21
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    71
		} else if (matches(firstAttributes, attributes)) {
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    72
			// do UNION ALL – just append the records
1
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    73
		} else {
21
af4cb72127c1 do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
    74
			throw RelpipeCSVWriterException(L"To the CSV format we can convert only one relation or multiple relations that have same number of attributes of same types (relation and attribute names may differ – result is named after the first one).");
1
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    75
		}
0
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    76
	}
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    77
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    78
	void attribute(const string_t& value) override {
1
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    79
		valueCount++;
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    80
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    81
		if (value.size() > 0) {
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    82
			output << QUOTE;
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    83
			for (auto ch : convertor.to_bytes(value)) {
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    84
				if (ch == QUOTE) output << QUOTE << QUOTE;
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    85
				else output << ch;
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    86
			}
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    87
			output << QUOTE;
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    88
		}
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    89
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    90
		if (valueCount % firstAttributes.size()) {
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    91
			output << ",";
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    92
		} else {
3
b5a5bf32f7ff generate CRLF line ends according to RFC 4180
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    93
			output << "\r\n";
1
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    94
			valueCount = 0;
82f86dc48339 first working version
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    95
		}
0
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    96
	}
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    97
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    98
	void endOfPipe() {
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    99
		output.flush();
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   100
	}
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   101
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   102
};
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   103
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   104
}
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   105
}
97967db4b95b project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   106
}