|
1 /** |
|
2 * Relational pipes |
|
3 * Copyright © 2021 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 * |
|
5 * This program is free software: you can redistribute it and/or modify |
|
6 * it under the terms of the GNU General Public License as published by |
|
7 * the Free Software Foundation, version 3 of the License. |
|
8 * |
|
9 * This program is distributed in the hope that it will be useful, |
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 * GNU General Public License for more details. |
|
13 * |
|
14 * You should have received a copy of the GNU General Public License |
|
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 */ |
|
17 #pragma once |
|
18 |
|
19 #include <memory> |
|
20 #include <vector> |
|
21 #include <array> |
|
22 #include <sstream> |
|
23 #include <regex> |
|
24 |
|
25 #include "ASN1Reader.h" |
|
26 #include "ValidatingASN1ContentHandler.h" |
|
27 #include "uri.h" |
|
28 |
|
29 namespace relpipe { |
|
30 namespace in { |
|
31 namespace asn1 { |
|
32 namespace lib { |
|
33 |
|
34 /** |
|
35 * Reads ASN.1 data encoded as BER (DER, CER). |
|
36 */ |
|
37 class BasicASN1Reader : public ASN1Reader { |
|
38 private: |
|
39 |
|
40 bool started = false; |
|
41 |
|
42 bool parseEncapsulated = true; |
|
43 |
|
44 /** |
|
45 * TODO: use a common method |
|
46 */ |
|
47 bool parseBoolean(const std::string& value) { |
|
48 if (value == "true") return true; |
|
49 else if (value == "false") return false; |
|
50 else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)"); |
|
51 } |
|
52 |
|
53 class BasicHeader : public ASN1ContentHandler::Header { |
|
54 public: |
|
55 bool definiteLength; |
|
56 size_t length; |
|
57 }; |
|
58 |
|
59 class LevelMetadata { |
|
60 public: |
|
61 bool definiteLength; |
|
62 size_t length; |
|
63 size_t start; |
|
64 }; |
|
65 |
|
66 std::vector<LevelMetadata> level; |
|
67 |
|
68 void checkRemainingItems() { |
|
69 if (level.size()) { |
|
70 LevelMetadata& l = level.back(); |
|
71 if (l.definiteLength && l.length == getBytesRead() - l.start) { |
|
72 level.pop_back(); |
|
73 handlers->writeCollectionEnd(); |
|
74 checkRemainingItems(); // multiple collections may end at the same point |
|
75 } |
|
76 } |
|
77 } |
|
78 |
|
79 BasicHeader readHeader() { |
|
80 using TagClass = ASN1ContentHandler::TagClass; |
|
81 using PC = ASN1ContentHandler::PC; |
|
82 |
|
83 BasicHeader h; |
|
84 |
|
85 memset(&h, 0, sizeof (h)); // TODO: remove, not needed |
|
86 |
|
87 uint8_t tagByte; |
|
88 read(&tagByte, 1); |
|
89 |
|
90 h.tagClass = (TagClass) (tagByte >> 6); |
|
91 h.pc = (PC) ((tagByte >> 5) & 1); |
|
92 h.tag = tagByte & (0xFF >> 3); |
|
93 if (h.tag == 31) { // all five tag bits are set → tag number (greater than 30) is encoded in following octets |
|
94 h.tag = 0; |
|
95 uint8_t moreTag = 0; |
|
96 do { |
|
97 read(&moreTag, 1); |
|
98 h.tag = h.tag << 7 | (moreTag & (0xFF >> 1)); |
|
99 } while (moreTag & (1 << 7)); |
|
100 } |
|
101 |
|
102 uint8_t lengthByte; |
|
103 read(&lengthByte, 1); |
|
104 |
|
105 if (lengthByte >> 7 == 0) { |
|
106 // definite short |
|
107 h.definiteLength = true; |
|
108 h.length = lengthByte; |
|
109 } else if (lengthByte == 0b10000000) { |
|
110 // indefinite |
|
111 h.definiteLength = false; |
|
112 h.length = 0; |
|
113 } else if (lengthByte == 0xFF) { |
|
114 throw relpipe::writer::RelpipeWriterException(L"ASN.1 lengthByte == 0xFF (reserved value)"); // TODO: better exception |
|
115 } else { |
|
116 // definite long |
|
117 h.definiteLength = true; |
|
118 h.length = 0; |
|
119 std::vector<uint8_t> lengthBytes(lengthByte & 0b01111111, 0); |
|
120 read(lengthBytes.data(), lengthBytes.size()); |
|
121 for (uint8_t l : lengthBytes) h.length = (h.length << 8) + l; |
|
122 } |
|
123 |
|
124 return h; |
|
125 } |
|
126 |
|
127 const std::string readString(size_t length) { |
|
128 std::string result; |
|
129 |
|
130 for (size_t remaining = length; remaining;) { |
|
131 size_t current = std::min(remaining, (size_t) 3); |
|
132 result.resize(result.size() + current); |
|
133 read((uint8_t*) result.data() + result.size() - current, current); |
|
134 remaining -= current; |
|
135 } |
|
136 |
|
137 return result; |
|
138 } |
|
139 |
|
140 const std::vector<uint8_t> readVector(size_t length) { |
|
141 std::vector<uint8_t> result; |
|
142 std::string s = readString(length); // TODO: read directly to the vector |
|
143 result.resize(length); |
|
144 for (size_t i = 0; i < length; i++) result[i] = (uint8_t) s[i]; |
|
145 return result; |
|
146 } |
|
147 |
|
148 void processNext() { |
|
149 using TagClass = ASN1ContentHandler::TagClass; |
|
150 using PC = ASN1ContentHandler::PC; |
|
151 |
|
152 checkRemainingItems(); |
|
153 BasicHeader typeHeader = readHeader(); |
|
154 // commit(); // TODO: commit here and recover later instead of rollback? |
|
155 |
|
156 if (!started) { |
|
157 handlers->writeStreamStart(); |
|
158 started = true; |
|
159 } |
|
160 |
|
161 // TODO: check tagClass and pc |
|
162 |
|
163 // TODO: constants, more types |
|
164 if (typeHeader.tag == UniversalType::EndOfContent && typeHeader.tagClass == TagClass::Universal && typeHeader.pc == PC::Primitive) { |
|
165 handlers->writeCollectionEnd(); |
|
166 } else if (typeHeader.tag == UniversalType::Sequence) { |
|
167 level.push_back({typeHeader.definiteLength, typeHeader.length, getBytesRead()}); // TODO: transaction |
|
168 handlers->writeCollectionStart(typeHeader); |
|
169 } else if (typeHeader.tag == UniversalType::Set) { |
|
170 level.push_back({typeHeader.definiteLength, typeHeader.length, getBytesRead()}); // TODO: transaction |
|
171 handlers->writeCollectionStart(typeHeader); |
|
172 } else if (typeHeader.pc == PC::Constructed) { |
|
173 level.push_back({typeHeader.definiteLength, typeHeader.length, getBytesRead()}); // TODO: transaction |
|
174 handlers->writeCollectionStart(typeHeader); |
|
175 } else if (typeHeader.tag == UniversalType::Null && typeHeader.length == 0) { |
|
176 handlers->writeNull(typeHeader); |
|
177 } else if (typeHeader.tag == UniversalType::Boolean && typeHeader.definiteLength && typeHeader.length == 1) { |
|
178 bool value; |
|
179 read((uint8_t*) & value, 1); |
|
180 handlers->writeBoolean(typeHeader, value); |
|
181 } else if (typeHeader.tag == UniversalType::Integer && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) { |
|
182 std::vector<uint8_t> value = readVector(typeHeader.length); |
|
183 handlers->writeInteger(typeHeader, ASN1ContentHandler::Integer(value)); |
|
184 } else if (typeHeader.tag == UniversalType::ObjectIdentifier && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) { |
|
185 std::vector<uint8_t> value(typeHeader.length, 0x00); |
|
186 read(value.data(), typeHeader.length); |
|
187 handlers->writeOID(typeHeader,{value}); |
|
188 } else if (typeHeader.tag == UniversalType::UTF8String && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) { |
|
189 std::string s = readString(typeHeader.length); |
|
190 handlers->writeTextString(typeHeader, s); |
|
191 } else if (typeHeader.tag == UniversalType::PrintableString && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) { |
|
192 // TODO: check encoding |
|
193 std::string s = readString(typeHeader.length); |
|
194 handlers->writeTextString(typeHeader, s); |
|
195 } else if (typeHeader.tag == UniversalType::OctetString && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) { |
|
196 std::string s = readString(typeHeader.length); |
|
197 if (processEncapsulatedContent(typeHeader, s) == false) handlers->writeOctetString(typeHeader, s); |
|
198 } else if (typeHeader.tag == UniversalType::BitString && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) { |
|
199 std::string s = readString(typeHeader.length); |
|
200 if (processEncapsulatedContent(typeHeader, s) == false) { |
|
201 std::vector<bool> bits; |
|
202 // TODO: throw exception on wrong padding or insufficient length? |
|
203 if (s.size() > 1) { |
|
204 uint8_t padding = s[0]; |
|
205 for (uint8_t j = padding; j < 8; j++) bits.push_back(s.back() & 1 << j); |
|
206 for (size_t i = s.size() - 2; i > 0; i--) for (uint8_t j = 0; j < 8; j++) bits.push_back(s[i] & 1 << j); |
|
207 } |
|
208 handlers->writeBitString(typeHeader, bits); |
|
209 } |
|
210 } else if (typeHeader.tag == UniversalType::UTCTime && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) { |
|
211 // TODO: check encoding |
|
212 std::string s = readString(typeHeader.length); |
|
213 |
|
214 ASN1ContentHandler::DateTime dateTime; |
|
215 |
|
216 std::smatch match; |
|
217 if (std::regex_match(s, match, std::regex("([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})?(Z|([+-][0-9]{2})([0-9]{2}))"))) { |
|
218 int i = 1; |
|
219 uint32_t year = std::stoi(match[i++]); |
|
220 dateTime.year = year < 50 ? 2000 + year : 1900 + year; |
|
221 dateTime.month = std::stoi(match[i++]); |
|
222 dateTime.day = std::stoi(match[i++]); |
|
223 dateTime.hour = std::stoi(match[i++]); |
|
224 dateTime.minute = std::stoi(match[i++]); |
|
225 dateTime.precision = match[i].length() ? ASN1ContentHandler::DateTime::Precision::Second : ASN1ContentHandler::DateTime::Precision::Minute; |
|
226 dateTime.second = match[i].length() ? std::stoi(match[i]) : 0; |
|
227 i++; |
|
228 if (match[i++] != "Z") { |
|
229 dateTime.timezoneHour = std::stoi(match[i++]); |
|
230 dateTime.timezoneMinute = std::stoi(match[i++]); |
|
231 } |
|
232 handlers->writeDateTime(typeHeader, dateTime); |
|
233 } else { |
|
234 throw std::invalid_argument("Unsupported UTCTime format: " + s); // TODO: better exception |
|
235 } |
|
236 |
|
237 } else if (typeHeader.tag == UniversalType::GeneralizedTime && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) { |
|
238 std::string s = readString(typeHeader.length); |
|
239 |
|
240 ASN1ContentHandler::DateTime dateTime; |
|
241 |
|
242 std::smatch match; |
|
243 if (std::regex_match(s, match, std::regex("([0-9]{4})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})(\\.([0-9]{1,3}))?(Z|([+-][0-9]{2})([0-9]{2}))"))) { |
|
244 // TODO: support also fractions of minutes and hours in GeneralizedTime |
|
245 int i = 1; |
|
246 dateTime.year = std::stoi(match[i++]); |
|
247 dateTime.month = std::stoi(match[i++]); |
|
248 dateTime.day = std::stoi(match[i++]); |
|
249 dateTime.hour = std::stoi(match[i++]); |
|
250 dateTime.minute = std::stoi(match[i++]); |
|
251 dateTime.second = match[i].length() ? std::stoi(match[i++]) : 0; |
|
252 dateTime.precision = match[i++].length() ? ASN1ContentHandler::DateTime::Precision::Nanosecond : ASN1ContentHandler::DateTime::Precision::Second; |
|
253 if (match[i].length() == 1) dateTime.nanosecond = std::stoi(match[i++]) * 100 * 1000000; |
|
254 else if (match[i].length() == 2) dateTime.nanosecond = std::stoi(match[i++]) * 10 * 1000000; |
|
255 else if (match[i].length() == 3) dateTime.nanosecond = std::stoi(match[i++]) * 1000000; |
|
256 else i++; |
|
257 if (match[i++] != "Z") { |
|
258 dateTime.timezoneHour = std::stoi(match[i++]); |
|
259 dateTime.timezoneMinute = std::stoi(match[i++]); |
|
260 } |
|
261 handlers->writeDateTime(typeHeader, dateTime); |
|
262 } else { |
|
263 throw std::invalid_argument("Unsupported GeneralizedTime format: " + s); // TODO: better exception |
|
264 } |
|
265 |
|
266 } else { |
|
267 // TODO: do not skip, parse |
|
268 std::string s = readString(typeHeader.length); |
|
269 handlers->writeSpecific(typeHeader, s); |
|
270 } |
|
271 |
|
272 commit(); |
|
273 } |
|
274 |
|
275 bool hasAvailableForReading() { |
|
276 // TODO: API in AbstractParser for checking available bytes? |
|
277 uint8_t tmp; |
|
278 try { |
|
279 peek(&tmp, 1); |
|
280 return true; |
|
281 } catch (...) { |
|
282 return false; |
|
283 } |
|
284 } |
|
285 |
|
286 bool isValidBER(const std::string& input) { |
|
287 BasicASN1Reader encapsulatedReader; |
|
288 std::shared_ptr<ValidatingASN1ContentHandler> validatingHandler = std::make_shared<ValidatingASN1ContentHandler>(); |
|
289 encapsulatedReader.addHandler(validatingHandler); |
|
290 try { |
|
291 encapsulatedReader.write((const uint8_t*) input.c_str(), input.size()); |
|
292 encapsulatedReader.close(); |
|
293 validatingHandler->finalCheck(); |
|
294 return true; |
|
295 } catch (...) { |
|
296 return false; |
|
297 } |
|
298 } |
|
299 |
|
300 class EncapsulatedASN1ContentHandler : public ASN1ContentHandlerProxy { |
|
301 public: |
|
302 |
|
303 void writeStreamStart() override { |
|
304 // skip this event |
|
305 } |
|
306 |
|
307 void writeStreamEnd() override { |
|
308 // skip this event |
|
309 } |
|
310 }; |
|
311 |
|
312 /** |
|
313 * @param typeHeader |
|
314 * @param input OCTET STRING or BIT STRING raw bytes |
|
315 * @return whether we found valid content and passed parsed results to handlers |
|
316 */ |
|
317 bool processEncapsulatedContent(const BasicHeader& typeHeader, const std::string& input) { |
|
318 // TODO: avoid double parsing + encapsulated content might be also processed at the XML/DOM level where we may even do conditional processing based on XPath (evaluate only certain octet- or bit- strings) |
|
319 // We may also do the same as with SEQUENCE or SET (continue nested reading in this ASN1Rreader instance), but it would require valid encapsulated data and would avoid easy fallback to raw OCTET or BIT STRING. We would also have to check the boundaries of the nested part. |
|
320 if (parseEncapsulated && isValidBER(input)) { |
|
321 handlers->writeCollectionStart(typeHeader); |
|
322 |
|
323 BasicASN1Reader encapsulatedReader; |
|
324 std::shared_ptr<EncapsulatedASN1ContentHandler> encapsulatedHandler = std::make_shared<EncapsulatedASN1ContentHandler>(); |
|
325 encapsulatedHandler->addHandler(handlers); |
|
326 encapsulatedReader.addHandler(encapsulatedHandler); |
|
327 |
|
328 encapsulatedReader.write((const uint8_t*) input.c_str(), input.size()); |
|
329 encapsulatedReader.close(); |
|
330 |
|
331 handlers->writeCollectionEnd(); |
|
332 return true; |
|
333 } else { |
|
334 return false; |
|
335 } |
|
336 } |
|
337 |
|
338 protected: |
|
339 |
|
340 void update() override { |
|
341 while (true) processNext(); |
|
342 } |
|
343 |
|
344 public: |
|
345 |
|
346 bool setOption(const std::string& uri, const std::string& value) override { |
|
347 if (uri == option::Encoding && value == encoding::ber); // currently, we support only BER (and thus also CER and DER) encoding, but options have no actual effect – we just validate them |
|
348 else if (uri == option::Encoding && value == encoding::cer); // in future versions, this might switch the parser into more strict mode |
|
349 else if (uri == option::Encoding && value == encoding::der); // in future versions, this might switch the parser into more strict mode |
|
350 else if (uri == option::Encoding && value == encoding::per) throw std::invalid_argument("PER encoding is not yet supported"); |
|
351 else if (uri == option::Encoding && value == encoding::xer) throw std::invalid_argument("XER encoding is not yet supported"); |
|
352 else if (uri == option::Encoding && value == encoding::asn1) throw std::invalid_argument("ASN.1 encoding is not yet supported"); |
|
353 else if (uri == option::Encoding) throw std::invalid_argument("Unsupported ASN.1 encoding: " + value); |
|
354 else if (uri == option::ParseEncapsulated) parseEncapsulated = parseBoolean(value); |
|
355 else return false; |
|
356 |
|
357 return true; |
|
358 } |
|
359 |
|
360 void close() override { |
|
361 if (hasAvailableForReading()) throw std::logic_error("Unexpected content at the end of the stream"); // TODO: better exception |
|
362 |
|
363 // TODO: check also open sequences etc.; maybe in the handler |
|
364 |
|
365 checkRemainingItems(); |
|
366 // TODO: check the bytes remaining in the buffer |
|
367 if (started) handlers->writeStreamEnd(); |
|
368 } |
|
369 |
|
370 }; |
|
371 |
|
372 } |
|
373 } |
|
374 } |
|
375 } |