author | František Kučera <franta-hg@frantovo.cz> |
Sat, 18 Feb 2023 22:57:22 +0100 | |
branch | v_0 |
changeset 16 | 1731e8dff446 |
parent 15 | e5421eea0583 |
child 17 | f67047a1e19e |
permissions | -rw-r--r-- |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
1 |
/** |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
2 |
* Relational pipes |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
3 |
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
4 |
* |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
5 |
* This program is free software: you can redistribute it and/or modify |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
6 |
* it under the terms of the GNU General Public License as published by |
7
d59f31fa7009
fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents:
3
diff
changeset
|
7 |
* the Free Software Foundation, version 3 of the License. |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
8 |
* |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
9 |
* This program is distributed in the hope that it will be useful, |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
12 |
* GNU General Public License for more details. |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
13 |
* |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
14 |
* You should have received a copy of the GNU General Public License |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
16 |
*/ |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
17 |
#pragma once |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
18 |
|
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
19 |
#include <string> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
20 |
#include <vector> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
21 |
#include <iostream> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
22 |
#include <sstream> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
23 |
#include <locale> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
24 |
#include <codecvt> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
25 |
#include <regex> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
26 |
#include <cassert> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
27 |
|
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
28 |
#include <relpipe/reader/typedefs.h> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
29 |
#include <relpipe/reader/TypeId.h> |
1 | 30 |
#include <relpipe/reader/RelpipeReaderException.h> |
31 |
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h> |
|
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
32 |
#include <relpipe/reader/handlers/AttributeMetadata.h> |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
33 |
|
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
34 |
namespace relpipe { |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
35 |
namespace out { |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
36 |
namespace recfile { |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
37 |
|
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
38 |
using namespace relpipe::reader; |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
39 |
|
2
db029ce62ec5
fix typo: Hadler → Handler
František Kučera <franta-hg@frantovo.cz>
parents:
1
diff
changeset
|
40 |
class RecfileHandler : public handlers::RelationalReaderStringHandler { |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
41 |
private: |
1 | 42 |
std::ostream& output; |
43 |
wstring_convert<codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8 |
|
44 |
std::vector<TypeId> attributeTypes; |
|
45 |
std::vector<string_t> attributeTypeCodes; |
|
46 |
std::vector<string_t> attributeNames; |
|
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
47 |
integer_t valueCount = 0; |
1 | 48 |
integer_t attributeCount = 0; |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
49 |
integer_t relationCount = 0; |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
50 |
|
1 | 51 |
void writeRelationName(const string_t& name) { |
52 |
// FIXME: escaping/filtering |
|
53 |
output << "%rec: " << convertor.to_bytes(name) << std::endl; |
|
54 |
} |
|
55 |
||
56 |
const std::string toRecfileType(const TypeId& type) { |
|
57 |
switch (type) { |
|
58 |
case TypeId::BOOLEAN: return "bool"; |
|
59 |
case TypeId::INTEGER: return "int"; |
|
3
b0fe316e1201
string is the default type in recfile, so there is no need for %type metadata for string attributes
František Kučera <franta-hg@frantovo.cz>
parents:
2
diff
changeset
|
60 |
case TypeId::STRING: return ""; |
1 | 61 |
default: throw RelpipeReaderException(L"Unsupported type – unable to convert to a Recfile type"); |
62 |
} |
|
63 |
} |
|
64 |
||
65 |
void writeAttributeMetadata(const handlers::AttributeMetadata& attribute) { |
|
66 |
// FIXME: escaping/filtering |
|
3
b0fe316e1201
string is the default type in recfile, so there is no need for %type metadata for string attributes
František Kučera <franta-hg@frantovo.cz>
parents:
2
diff
changeset
|
67 |
std::string recfileType = toRecfileType(attribute.getTypeId()); |
b0fe316e1201
string is the default type in recfile, so there is no need for %type metadata for string attributes
František Kučera <franta-hg@frantovo.cz>
parents:
2
diff
changeset
|
68 |
if (recfileType.size()) output << "%type: " << convertor.to_bytes(attribute.getAttributeName()) << " " << recfileType << std::endl; |
1 | 69 |
} |
70 |
||
71 |
void writeSeparator() { |
|
72 |
output << std::endl; |
|
73 |
} |
|
74 |
||
16
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
75 |
bool between(wchar_t ch, wchar_t start, wchar_t end) { |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
76 |
return ch >= start && ch <= end; |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
77 |
} |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
78 |
|
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
79 |
bool isValidNameCharacter(wchar_t ch, bool first) { |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
80 |
if (first) { |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
81 |
// also '%' is technically valid here, but it is used for special |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
82 |
// purposes like the relation name or attribute types |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
83 |
return /**/between(ch, L'a', L'z') |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
84 |
|| between(ch, L'A', L'Z'); |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
85 |
} else { |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
86 |
return ch == L'_' |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
87 |
|| between(ch, L'a', L'z') |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
88 |
|| between(ch, L'A', L'Z') |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
89 |
|| between(ch, L'0', L'9'); |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
90 |
} |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
91 |
} |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
92 |
|
1 | 93 |
void writeAttribute(const string_t& name, const TypeId& type, const string_t& value) { |
16
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
94 |
// TODO: multiple escapting mode - including one that is not lossless |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
95 |
// but allows writing a single '_' inside the name |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
96 |
for (size_t i = 0, limit = name.size(); i < limit; i++) { |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
97 |
wchar_t ch = name[i]; |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
98 |
bool valid = isValidNameCharacter(ch, i == 0); |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
99 |
|
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
100 |
// Not a lossless round-trip |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
101 |
// (maybe we could sacrifice some reserved prefix): |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
102 |
if (i == 0 && !valid) output << 'x'; |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
103 |
|
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
104 |
if (ch == '_') output << "__"; |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
105 |
else if (valid) output << convertor.to_bytes(ch); |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
106 |
else output << '_' << ((uint32_t) ch) << '_'; |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
107 |
} |
1731e8dff446
attribute name escaping: first version
František Kučera <franta-hg@frantovo.cz>
parents:
15
diff
changeset
|
108 |
output << ": "; |
1 | 109 |
|
110 |
for (char ch : convertor.to_bytes(value)) { |
|
111 |
output << ch; |
|
112 |
if (ch == '\n') output << "+ "; |
|
113 |
} |
|
114 |
||
115 |
output << std::endl; |
|
116 |
} |
|
117 |
||
15
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
118 |
void writeRecordCount() { |
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
119 |
if (attributeCount) { |
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
120 |
output << std::endl << "# Record count: " << (valueCount / attributeCount) << std::endl; |
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
121 |
} |
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
122 |
} |
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
123 |
|
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
124 |
public: |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
125 |
|
1 | 126 |
RecfileHandler(std::ostream& output) : output(output) { |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
127 |
} |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
128 |
|
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
129 |
void startRelation(string_t name, std::vector<handlers::AttributeMetadata> attributes) override { |
15
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
130 |
writeRecordCount(); |
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
131 |
|
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
132 |
valueCount = 0; |
1 | 133 |
attributeCount = 0; |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
134 |
|
1 | 135 |
if (relationCount) writeSeparator(); |
136 |
||
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
137 |
relationCount++; |
1 | 138 |
writeRelationName(name); |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
139 |
|
1 | 140 |
attributeCount = attributes.size(); |
141 |
attributeTypes.resize(attributeCount); |
|
142 |
attributeTypeCodes.resize(attributeCount); |
|
143 |
attributeNames.resize(attributeCount); |
|
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
144 |
for (int i = 0; i < attributes.size(); i++) { |
1 | 145 |
attributeNames[i] = attributes[i].getAttributeName(); |
146 |
attributeTypes[i] = attributes[i].getTypeId(); |
|
147 |
attributeTypeCodes[i] = attributes[i].getTypeName(); |
|
148 |
writeAttributeMetadata(attributes[i]); |
|
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
149 |
} |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
150 |
} |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
151 |
|
1 | 152 |
void attribute(const string_t& value) override { |
153 |
integer_t i = valueCount % attributeCount; |
|
154 |
if (i == 0) writeSeparator(); |
|
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
155 |
valueCount++; |
1 | 156 |
writeAttribute(attributeNames[i], attributeTypes[i], value); |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
157 |
} |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
158 |
|
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
159 |
void endOfPipe() { |
15
e5421eea0583
write record count (as a comment at the end of the relation)
František Kučera <franta-hg@frantovo.cz>
parents:
7
diff
changeset
|
160 |
writeRecordCount(); |
1 | 161 |
if (valueCount) writeSeparator(); |
0
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
162 |
} |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
163 |
|
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
164 |
}; |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
165 |
|
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
166 |
} |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
167 |
} |
9005fdd81bca
project and code skeleton: text output
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
168 |
} |