author | František Kučera <franta-hg@frantovo.cz> |
Sat, 27 Aug 2022 21:56:52 +0200 | |
branch | v_0 |
changeset 21 | af4cb72127c1 |
parent 17 | ea36eed9683f |
permissions | -rw-r--r-- |
0 | 1 |
/** |
2 |
* Relational pipes |
|
3 |
* Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 |
* |
|
5 |
* This program is free software: you can redistribute it and/or modify |
|
6 |
* it under the terms of the GNU General Public License as published by |
|
10
4bcf3fb7cc48
fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents:
6
diff
changeset
|
7 |
* the Free Software Foundation, version 3 of the License. |
0 | 8 |
* |
9 |
* This program is distributed in the hope that it will be useful, |
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
* GNU General Public License for more details. |
|
13 |
* |
|
14 |
* You should have received a copy of the GNU General Public License |
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 |
*/ |
|
17 |
#pragma once |
|
18 |
||
19 |
#include <memory> |
|
20 |
#include <string> |
|
21 |
#include <vector> |
|
22 |
#include <iostream> |
|
23 |
#include <sstream> |
|
24 |
#include <locale> |
|
25 |
#include <codecvt> |
|
26 |
||
27 |
#include <relpipe/reader/typedefs.h> |
|
28 |
#include <relpipe/reader/TypeId.h> |
|
29 |
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h> |
|
30 |
#include <relpipe/reader/handlers/AttributeMetadata.h> |
|
31 |
||
14
a7596589a5b0
change CLI interface: options: --write-header
František Kučera <franta-hg@frantovo.cz>
parents:
10
diff
changeset
|
32 |
#include "Configuration.h" |
1
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
33 |
#include "RelpipeCSVWriterException.h" |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
34 |
|
0 | 35 |
namespace relpipe { |
36 |
namespace out { |
|
37 |
namespace csv { |
|
38 |
||
39 |
using namespace relpipe; |
|
40 |
using namespace relpipe::reader; |
|
41 |
using namespace relpipe::reader::handlers; |
|
42 |
||
6
de4c706edf41
fix typo: Hadler → Handler
František Kučera <franta-hg@frantovo.cz>
parents:
3
diff
changeset
|
43 |
class CSVHandler : public RelationalReaderStringHandler { |
0 | 44 |
private: |
45 |
std::ostream& output; |
|
14
a7596589a5b0
change CLI interface: options: --write-header
František Kučera <franta-hg@frantovo.cz>
parents:
10
diff
changeset
|
46 |
Configuration& configuration; |
1
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
47 |
const char QUOTE = '"'; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
48 |
std::wstring_convert<std::codecvt_utf8<wchar_t>> convertor; // TODO: local system encoding or generate CSV always in UTF-8 like XML? |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
49 |
std::vector<AttributeMetadata> firstAttributes; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
50 |
integer_t valueCount = 0; |
21
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
51 |
|
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
52 |
/** |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
53 |
* @param a |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
54 |
* @param b |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
55 |
* @return true if relations have same number and types of attributes (names may differ) |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
56 |
*/ |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
57 |
bool matches(const std::vector<AttributeMetadata>& a, const std::vector<AttributeMetadata>& b) { |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
58 |
if (a.size() != b.size()) return false; |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
59 |
for (int i = 0, limit = a.size(); i < limit; i++) if (a[i].getTypeId() != b[i].getTypeId()) return false; |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
60 |
return true; |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
61 |
} |
0 | 62 |
public: |
63 |
||
14
a7596589a5b0
change CLI interface: options: --write-header
František Kučera <franta-hg@frantovo.cz>
parents:
10
diff
changeset
|
64 |
CSVHandler(std::ostream& output, Configuration& configuration) : output(output), configuration(configuration) { |
0 | 65 |
} |
66 |
||
67 |
void startRelation(string_t name, std::vector<AttributeMetadata> attributes) override { |
|
1
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
68 |
if (firstAttributes.empty()) { |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
69 |
firstAttributes = attributes; |
17
ea36eed9683f
optionally write data types into the CSV header: --write-types
František Kučera <franta-hg@frantovo.cz>
parents:
14
diff
changeset
|
70 |
if (configuration.writeHeader) for (auto attr : attributes) attribute(configuration.writeTypes ? attr.getAttributeName() + L"::" + attr.getTypeName() : attr.getAttributeName()); |
21
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
71 |
} else if (matches(firstAttributes, attributes)) { |
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
72 |
// do UNION ALL – just append the records |
1
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
73 |
} else { |
21
af4cb72127c1
do UNION ALL if multiple relations have same number of attributes of same types
František Kučera <franta-hg@frantovo.cz>
parents:
17
diff
changeset
|
74 |
throw RelpipeCSVWriterException(L"To the CSV format we can convert only one relation or multiple relations that have same number of attributes of same types (relation and attribute names may differ – result is named after the first one)."); |
1
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
75 |
} |
0 | 76 |
} |
77 |
||
78 |
void attribute(const string_t& value) override { |
|
1
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
79 |
valueCount++; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
80 |
|
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
81 |
if (value.size() > 0) { |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
82 |
output << QUOTE; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
83 |
for (auto ch : convertor.to_bytes(value)) { |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
84 |
if (ch == QUOTE) output << QUOTE << QUOTE; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
85 |
else output << ch; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
86 |
} |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
87 |
output << QUOTE; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
88 |
} |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
89 |
|
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
90 |
if (valueCount % firstAttributes.size()) { |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
91 |
output << ","; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
92 |
} else { |
3
b5a5bf32f7ff
generate CRLF line ends according to RFC 4180
František Kučera <franta-hg@frantovo.cz>
parents:
1
diff
changeset
|
93 |
output << "\r\n"; |
1
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
94 |
valueCount = 0; |
82f86dc48339
first working version
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
95 |
} |
0 | 96 |
} |
97 |
||
98 |
void endOfPipe() { |
|
99 |
output.flush(); |
|
100 |
} |
|
101 |
||
102 |
}; |
|
103 |
||
104 |
} |
|
105 |
} |
|
106 |
} |