1 /** |
|
2 * Relational pipes |
|
3 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 * |
|
5 * This program is free software: you can redistribute it and/or modify |
|
6 * it under the terms of the GNU General Public License as published by |
|
7 * the Free Software Foundation, version 3 of the License. |
|
8 * |
|
9 * This program is distributed in the hope that it will be useful, |
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 * GNU General Public License for more details. |
|
13 * |
|
14 * You should have received a copy of the GNU General Public License |
|
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 */ |
|
17 #pragma once |
|
18 |
|
19 #include <memory> |
|
20 #include <string> |
|
21 #include <vector> |
|
22 #include <map> |
|
23 #include <iostream> |
|
24 #include <sstream> |
|
25 #include <locale> |
|
26 #include <codecvt> |
|
27 #include <regex> |
|
28 #include <assert.h> |
|
29 |
|
30 #include <libguile.h> |
|
31 |
|
32 #include <relpipe/reader/typedefs.h> |
|
33 #include <relpipe/reader/TypeId.h> |
|
34 #include <relpipe/reader/handlers/RelationalReaderValueHandler.h> |
|
35 #include <relpipe/reader/handlers/AttributeMetadata.h> |
|
36 |
|
37 #include <relpipe/writer/Factory.h> |
|
38 |
|
39 #include <relpipe/cli/RelpipeCLIException.h> |
|
40 |
|
41 #include "Configuration.h" |
|
42 #include "GuileException.h" |
|
43 |
|
44 namespace relpipe { |
|
45 namespace tr { |
|
46 namespace guile { |
|
47 |
|
48 using namespace std; |
|
49 using namespace relpipe; |
|
50 using namespace relpipe::reader; |
|
51 using namespace relpipe::reader::handlers; |
|
52 |
|
53 class GuileHandler : public RelationalReaderValueHandler { |
|
54 private: |
|
55 std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or use always UTF-8 between C++ and Guile |
|
56 |
|
57 Configuration configuration; |
|
58 writer::RelationalWriter* relationalWriter; |
|
59 |
|
60 RelationConfiguration* currentRelationConfiguration = nullptr; |
|
61 vector<AttributeMetadata> currentReaderMetadata; |
|
62 vector<writer::AttributeMetadata> currentWriterMetadata; |
|
63 std::map<string_t, string_t> currenVariablesMapping; |
|
64 integer_t currentAttributeIndex = 0; |
|
65 boolean_t includeCurrentRecord = false; |
|
66 |
|
67 void add(vector<AttributeMetadata>& readerAttributes, vector<writer::AttributeMetadata>& writerAttributes) { |
|
68 for (AttributeMetadata readerAttributes : readerAttributes) |
|
69 writerAttributes.push_back({ |
|
70 readerAttributes.getAttributeName(), |
|
71 relationalWriter->toTypeId(readerAttributes.getTypeName()) |
|
72 }); |
|
73 } |
|
74 |
|
75 void generateVariableMappings() { |
|
76 currenVariablesMapping.clear(); |
|
77 for (AttributeMetadata m : currentReaderMetadata) currenVariablesMapping[m.getAttributeName()] = L""; |
|
78 for (writer::AttributeMetadata m : currentWriterMetadata) currenVariablesMapping[m.attributeName] = L""; |
|
79 |
|
80 for (std::pair<string_t, string_t> m : currenVariablesMapping) { |
|
81 currenVariablesMapping[m.first] = escapeAwkVariableName(m.first); |
|
82 } |
|
83 } |
|
84 |
|
85 /** |
|
86 * @param attributeName name from relational pipe |
|
87 * @return variable name in Guile |
|
88 */ |
|
89 string_t a2v(const string_t& attributeName) { |
|
90 if (currenVariablesMapping.find(attributeName) != currenVariablesMapping.end()) return currenVariablesMapping[attributeName]; |
|
91 else throw GuileException(L"Unable to find value in currenVariablesMapping"); |
|
92 } |
|
93 |
|
94 template <typename K, typename V> bool containsValue(std::map<K, V> map, V value) { // TODO: common function (Guile, AWK) |
|
95 for (std::pair<K, V> p : map) if (p.second == value) return true; |
|
96 return false; |
|
97 } |
|
98 |
|
99 string_t escapeAwkVariableName(const string_t& attributeName, bool addPrefix = true) { |
|
100 std::wregex badCharacters(L"\\s"); |
|
101 string_t name = std::regex_replace(attributeName, badCharacters, L"-"); |
|
102 |
|
103 if (addPrefix) name = L"$" + name; // $ = standard attribute-variable prefix |
|
104 |
|
105 if (containsValue(currenVariablesMapping, name)) return escapeAwkVariableName(L"$" + name, false); // $ = different prefix added to distinguish two attributes with ambiguous names |
|
106 else return name; |
|
107 |
|
108 } |
|
109 |
|
110 void debugVariableMapping(const string_t& relationName) { |
|
111 relationalWriter->startRelation(relationName + L".variableMapping",{ |
|
112 {L"attribute", writer::TypeId::STRING}, |
|
113 {L"variable", writer::TypeId::STRING}, |
|
114 }, true); |
|
115 |
|
116 for (std::pair<string_t, string_t> m : currenVariablesMapping) { |
|
117 relationalWriter->writeAttribute(m.first); |
|
118 relationalWriter->writeAttribute(m.second); |
|
119 } |
|
120 } |
|
121 |
|
122 SCM toGuileSymbol(const string_t& name) { |
|
123 return scm_string_to_symbol(scm_from_locale_string(convertor.to_bytes(name).c_str())); |
|
124 } |
|
125 |
|
126 /** |
|
127 * @param code guile source code e.g. (+ 1 2 3) or #t |
|
128 * @param defaultReturnValue is returned if code is empty |
|
129 * @return result of code execution or defaultReturnValue |
|
130 */ |
|
131 SCM evalGuileCode(const string_t& code, SCM defaultReturnValue = SCM_BOOL_F) { |
|
132 if (code.size()) return scm_eval_string(toGuileValue(&code, typeid (string_t), TypeId::STRING)); |
|
133 else return defaultReturnValue; |
|
134 } |
|
135 |
|
136 SCM toGuileValue(const void* value, const std::type_info& typeInfo, TypeId type) { |
|
137 switch (type) { |
|
138 case TypeId::BOOLEAN: |
|
139 { |
|
140 assert(typeInfo == typeid (boolean_t)); |
|
141 auto* typedValue = static_cast<const boolean_t*> (value); |
|
142 return *typedValue ? SCM_BOOL_T : SCM_BOOL_F; |
|
143 } |
|
144 case TypeId::INTEGER: |
|
145 { |
|
146 assert(typeInfo == typeid (integer_t)); |
|
147 auto* typedValue = static_cast<const integer_t*> (value); |
|
148 return scm_from_int64(*typedValue); |
|
149 } |
|
150 case TypeId::STRING: |
|
151 { |
|
152 assert(typeInfo == typeid (string_t)); |
|
153 auto* typedValue = static_cast<const string_t*> (value); |
|
154 return scm_from_locale_string(convertor.to_bytes(*typedValue).c_str()); |
|
155 } |
|
156 default: |
|
157 throw cli::RelpipeCLIException(L"Unsupported type in toGuileValue()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); |
|
158 } |
|
159 } |
|
160 |
|
161 void defineGuileVariable(const string_t& name, const void* value, const std::type_info& typeInfo, TypeId type) { |
|
162 scm_define(toGuileSymbol(name), toGuileValue(value, typeInfo, type)); |
|
163 } |
|
164 |
|
165 /** |
|
166 * TODO: use a common method |
|
167 */ |
|
168 bool parseBoolean(const string_t& value) { |
|
169 if (value == L"true") return true; |
|
170 else if (value == L"false") return false; |
|
171 else throw relpipe::cli::RelpipeCLIException(L"Unable to parse boolean value: " + value + L" (expecting true or false)", relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS); |
|
172 } |
|
173 |
|
174 void defineGuileVariable(const DefinitionRecipe& definition) { |
|
175 switch (relationalWriter->toTypeId(definition.type)) { |
|
176 case writer::TypeId::BOOLEAN: |
|
177 { |
|
178 boolean_t value = parseBoolean(definition.value); |
|
179 defineGuileVariable(definition.name, &value, typeid (value), TypeId::BOOLEAN); |
|
180 break; |
|
181 } |
|
182 case writer::TypeId::INTEGER: |
|
183 { |
|
184 integer_t value = stol(definition.value); |
|
185 defineGuileVariable(definition.name, &value, typeid (value), TypeId::INTEGER); |
|
186 break; |
|
187 } |
|
188 case writer::TypeId::STRING: |
|
189 { |
|
190 defineGuileVariable(definition.name, &definition.value, typeid (definition.value), TypeId::STRING); |
|
191 break; |
|
192 } |
|
193 default: |
|
194 throw cli::RelpipeCLIException(L"Unsupported type in defineGuileVariable(): " + definition.type, cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); |
|
195 } |
|
196 } |
|
197 |
|
198 void undefineGuileVariable(const string_t& name) { |
|
199 scm_define(toGuileSymbol(name), scm_make_undefined_variable()); // undefined != (define n) |
|
200 // TODO: or use: scm_variable_unset_x() ? |
|
201 } |
|
202 |
|
203 void writeGuileValueToAttribute(const writer::AttributeMetadata& attribute) { |
|
204 string_t variableName = a2v(attribute.attributeName); |
|
205 SCM guileValue = scm_eval_string(toGuileValue(&variableName, typeid (variableName), TypeId::STRING)); |
|
206 |
|
207 switch (attribute.typeId) { |
|
208 case writer::TypeId::BOOLEAN: |
|
209 { |
|
210 boolean_t value = scm_to_bool(guileValue); |
|
211 return relationalWriter->writeAttribute(&value, typeid (value)); |
|
212 } |
|
213 case writer::TypeId::INTEGER: |
|
214 { |
|
215 integer_t value = scm_to_int64(guileValue); |
|
216 return relationalWriter->writeAttribute(&value, typeid (value)); |
|
217 } |
|
218 case writer::TypeId::STRING: |
|
219 { |
|
220 char* ch = scm_to_locale_string(guileValue); |
|
221 string_t value = convertor.from_bytes(ch); |
|
222 free(ch); |
|
223 return relationalWriter->writeAttribute(&value, typeid (value)); |
|
224 } |
|
225 default: |
|
226 throw cli::RelpipeCLIException(L"Unsupported type in writeGuileValueToAttribute()", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); |
|
227 } |
|
228 } |
|
229 |
|
230 /** |
|
231 * Read from the Guile variables and write to relational output stream. |
|
232 */ |
|
233 void writeCurrentRecord() { |
|
234 for (auto attribute : currentWriterMetadata) writeGuileValueToAttribute(attribute); |
|
235 } |
|
236 |
|
237 void writeMoreRecords() { |
|
238 while (scm_to_bool(evalGuileCode(currentRelationConfiguration->guileHasMoreRecords, SCM_BOOL_F))) writeCurrentRecord(); |
|
239 } |
|
240 |
|
241 public: |
|
242 |
|
243 GuileHandler(writer::RelationalWriter* relationalWriter, Configuration& configuration) : relationalWriter(relationalWriter), configuration(configuration) { |
|
244 } |
|
245 |
|
246 void startRelation(string_t name, vector<AttributeMetadata> attributes) override { |
|
247 if (currentRelationConfiguration) { |
|
248 evalGuileCode(currentRelationConfiguration->guileAfterRecords); |
|
249 writeMoreRecords(); |
|
250 for (DefinitionRecipe definition : currentRelationConfiguration->definitions) undefineGuileVariable(definition.name); |
|
251 } |
|
252 for (auto attribute : currentReaderMetadata) undefineGuileVariable(attribute.getAttributeName()); |
|
253 |
|
254 for (DefinitionRecipe definition : configuration.definitions) defineGuileVariable(definition); |
|
255 |
|
256 currentRelationConfiguration = nullptr; |
|
257 for (int i = 0; i < configuration.relationConfigurations.size(); i++) { |
|
258 if (regex_match(name, wregex(configuration.relationConfigurations[i].relation))) { |
|
259 currentRelationConfiguration = &configuration.relationConfigurations[i]; |
|
260 for (DefinitionRecipe definition : currentRelationConfiguration->definitions) defineGuileVariable(definition); |
|
261 break; // it there are multiple matches, only the first configuration is used |
|
262 } |
|
263 } |
|
264 |
|
265 currentReaderMetadata = attributes; |
|
266 // TODO: move to a reusable method (or use same metadata on both reader and writer side?) |
|
267 currentWriterMetadata.clear(); |
|
268 if (currentRelationConfiguration && currentRelationConfiguration->writerMetadata.size()) { |
|
269 if (currentRelationConfiguration->inputAttributesPrepend) add(currentReaderMetadata, currentWriterMetadata); |
|
270 currentWriterMetadata.insert(currentWriterMetadata.end(), currentRelationConfiguration->writerMetadata.begin(), currentRelationConfiguration->writerMetadata.end()); |
|
271 if (currentRelationConfiguration->inputAttributesAppend) add(currentReaderMetadata, currentWriterMetadata); |
|
272 } else { |
|
273 add(currentReaderMetadata, currentWriterMetadata); |
|
274 } |
|
275 |
|
276 generateVariableMappings(); |
|
277 |
|
278 if (currentRelationConfiguration && currentRelationConfiguration->debugVariableMapping) debugVariableMapping(name); |
|
279 |
|
280 if (!currentRelationConfiguration || !currentRelationConfiguration->drop) relationalWriter->startRelation(name, currentWriterMetadata, true); |
|
281 |
|
282 if (currentRelationConfiguration) { |
|
283 // TODO: better variable name, object, function? |
|
284 defineGuileVariable(L"relpipe-relation-name", &name, typeid (name), TypeId::STRING); |
|
285 evalGuileCode(currentRelationConfiguration->guileBeforeRecords); |
|
286 } |
|
287 } |
|
288 |
|
289 void attribute(const void* value, const std::type_info& type) override { |
|
290 if (currentRelationConfiguration) { |
|
291 defineGuileVariable(a2v(currentReaderMetadata[currentAttributeIndex].getAttributeName()), value, type, currentReaderMetadata[currentAttributeIndex].getTypeId()); |
|
292 |
|
293 currentAttributeIndex++; |
|
294 |
|
295 // TODO: > 0 ?: |
|
296 if (currentAttributeIndex > 0 && currentAttributeIndex % currentReaderMetadata.size() == 0) { |
|
297 evalGuileCode(currentRelationConfiguration->guileForEach); |
|
298 includeCurrentRecord = scm_to_bool(evalGuileCode(currentRelationConfiguration->guileWhere, SCM_BOOL_T)); |
|
299 if (includeCurrentRecord && !currentRelationConfiguration->drop) writeCurrentRecord(); |
|
300 includeCurrentRecord = false; |
|
301 writeMoreRecords(); |
|
302 } |
|
303 |
|
304 currentAttributeIndex = currentAttributeIndex % currentReaderMetadata.size(); |
|
305 } else { |
|
306 relationalWriter->writeAttribute(value, type); |
|
307 } |
|
308 } |
|
309 |
|
310 void endOfPipe() { |
|
311 if (currentRelationConfiguration) { |
|
312 evalGuileCode(currentRelationConfiguration->guileAfterRecords); |
|
313 writeMoreRecords(); |
|
314 } |
|
315 } |
|
316 |
|
317 }; |
|
318 |
|
319 } |
|
320 } |
|
321 } |
|