src/lib/BasicASN1Reader.h
branchv_0
changeset 1 68a281aefa76
equal deleted inserted replaced
0:28294b895e5e 1:68a281aefa76
       
     1 /**
       
     2  * Relational pipes
       
     3  * Copyright © 2021 František Kučera (Frantovo.cz, GlobalCode.info)
       
     4  *
       
     5  * This program is free software: you can redistribute it and/or modify
       
     6  * it under the terms of the GNU General Public License as published by
       
     7  * the Free Software Foundation, version 3 of the License.
       
     8  *
       
     9  * This program is distributed in the hope that it will be useful,
       
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
       
    12  * GNU General Public License for more details.
       
    13  *
       
    14  * You should have received a copy of the GNU General Public License
       
    15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
       
    16  */
       
    17 #pragma once
       
    18 
       
    19 #include <memory>
       
    20 #include <vector>
       
    21 #include <array>
       
    22 #include <sstream>
       
    23 #include <regex>
       
    24 
       
    25 #include "ASN1Reader.h"
       
    26 #include "ValidatingASN1ContentHandler.h"
       
    27 #include "uri.h"
       
    28 
       
    29 namespace relpipe {
       
    30 namespace in {
       
    31 namespace asn1 {
       
    32 namespace lib {
       
    33 
       
    34 /**
       
    35  * Reads ASN.1 data encoded as BER (DER, CER).
       
    36  */
       
    37 class BasicASN1Reader : public ASN1Reader {
       
    38 private:
       
    39 
       
    40 	bool started = false;
       
    41 
       
    42 	bool parseEncapsulated = true;
       
    43 
       
    44 	/**
       
    45 	 * TODO: use a common method
       
    46 	 */
       
    47 	bool parseBoolean(const std::string& value) {
       
    48 		if (value == "true") return true;
       
    49 		else if (value == "false") return false;
       
    50 		else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)");
       
    51 	}
       
    52 
       
    53 	class BasicHeader : public ASN1ContentHandler::Header {
       
    54 	public:
       
    55 		bool definiteLength;
       
    56 		size_t length;
       
    57 	};
       
    58 
       
    59 	class LevelMetadata {
       
    60 	public:
       
    61 		bool definiteLength;
       
    62 		size_t length;
       
    63 		size_t start;
       
    64 	};
       
    65 
       
    66 	std::vector<LevelMetadata> level;
       
    67 
       
    68 	void checkRemainingItems() {
       
    69 		if (level.size()) {
       
    70 			LevelMetadata& l = level.back();
       
    71 			if (l.definiteLength && l.length == getBytesRead() - l.start) {
       
    72 				level.pop_back();
       
    73 				handlers->writeCollectionEnd();
       
    74 				checkRemainingItems(); // multiple collections may end at the same point
       
    75 			}
       
    76 		}
       
    77 	}
       
    78 
       
    79 	BasicHeader readHeader() {
       
    80 		using TagClass = ASN1ContentHandler::TagClass;
       
    81 		using PC = ASN1ContentHandler::PC;
       
    82 
       
    83 		BasicHeader h;
       
    84 
       
    85 		memset(&h, 0, sizeof (h)); // TODO: remove, not needed
       
    86 
       
    87 		uint8_t tagByte;
       
    88 		read(&tagByte, 1);
       
    89 
       
    90 		h.tagClass = (TagClass) (tagByte >> 6);
       
    91 		h.pc = (PC) ((tagByte >> 5) & 1);
       
    92 		h.tag = tagByte & (0xFF >> 3);
       
    93 		if (h.tag == 31) { // all five tag bits are set → tag number (greater than 30) is encoded in following octets
       
    94 			h.tag = 0;
       
    95 			uint8_t moreTag = 0;
       
    96 			do {
       
    97 				read(&moreTag, 1);
       
    98 				h.tag = h.tag << 7 | (moreTag & (0xFF >> 1));
       
    99 			} while (moreTag & (1 << 7));
       
   100 		}
       
   101 
       
   102 		uint8_t lengthByte;
       
   103 		read(&lengthByte, 1);
       
   104 
       
   105 		if (lengthByte >> 7 == 0) {
       
   106 			// definite short
       
   107 			h.definiteLength = true;
       
   108 			h.length = lengthByte;
       
   109 		} else if (lengthByte == 0b10000000) {
       
   110 			// indefinite
       
   111 			h.definiteLength = false;
       
   112 			h.length = 0;
       
   113 		} else if (lengthByte == 0xFF) {
       
   114 			throw relpipe::writer::RelpipeWriterException(L"ASN.1 lengthByte == 0xFF (reserved value)"); // TODO: better exception
       
   115 		} else {
       
   116 			// definite long
       
   117 			h.definiteLength = true;
       
   118 			h.length = 0;
       
   119 			std::vector<uint8_t> lengthBytes(lengthByte & 0b01111111, 0);
       
   120 			read(lengthBytes.data(), lengthBytes.size());
       
   121 			for (uint8_t l : lengthBytes) h.length = (h.length << 8) + l;
       
   122 		}
       
   123 
       
   124 		return h;
       
   125 	}
       
   126 
       
   127 	const std::string readString(size_t length) {
       
   128 		std::string result;
       
   129 
       
   130 		for (size_t remaining = length; remaining;) {
       
   131 			size_t current = std::min(remaining, (size_t) 3);
       
   132 			result.resize(result.size() + current);
       
   133 			read((uint8_t*) result.data() + result.size() - current, current);
       
   134 			remaining -= current;
       
   135 		}
       
   136 
       
   137 		return result;
       
   138 	}
       
   139 
       
   140 	const std::vector<uint8_t> readVector(size_t length) {
       
   141 		std::vector<uint8_t> result;
       
   142 		std::string s = readString(length); // TODO: read directly to the vector
       
   143 		result.resize(length);
       
   144 		for (size_t i = 0; i < length; i++) result[i] = (uint8_t) s[i];
       
   145 		return result;
       
   146 	}
       
   147 
       
   148 	void processNext() {
       
   149 		using TagClass = ASN1ContentHandler::TagClass;
       
   150 		using PC = ASN1ContentHandler::PC;
       
   151 
       
   152 		checkRemainingItems();
       
   153 		BasicHeader typeHeader = readHeader();
       
   154 		// commit(); // TODO: commit here and recover later instead of rollback?
       
   155 
       
   156 		if (!started) {
       
   157 			handlers->writeStreamStart();
       
   158 			started = true;
       
   159 		}
       
   160 
       
   161 		// TODO: check tagClass and pc
       
   162 
       
   163 		// TODO: constants, more types
       
   164 		if (typeHeader.tag == UniversalType::EndOfContent && typeHeader.tagClass == TagClass::Universal && typeHeader.pc == PC::Primitive) {
       
   165 			handlers->writeCollectionEnd();
       
   166 		} else if (typeHeader.tag == UniversalType::Sequence) {
       
   167 			level.push_back({typeHeader.definiteLength, typeHeader.length, getBytesRead()}); // TODO: transaction
       
   168 			handlers->writeCollectionStart(typeHeader);
       
   169 		} else if (typeHeader.tag == UniversalType::Set) {
       
   170 			level.push_back({typeHeader.definiteLength, typeHeader.length, getBytesRead()}); // TODO: transaction
       
   171 			handlers->writeCollectionStart(typeHeader);
       
   172 		} else if (typeHeader.pc == PC::Constructed) {
       
   173 			level.push_back({typeHeader.definiteLength, typeHeader.length, getBytesRead()}); // TODO: transaction
       
   174 			handlers->writeCollectionStart(typeHeader);
       
   175 		} else if (typeHeader.tag == UniversalType::Null && typeHeader.length == 0) {
       
   176 			handlers->writeNull(typeHeader);
       
   177 		} else if (typeHeader.tag == UniversalType::Boolean && typeHeader.definiteLength && typeHeader.length == 1) {
       
   178 			bool value;
       
   179 			read((uint8_t*) & value, 1);
       
   180 			handlers->writeBoolean(typeHeader, value);
       
   181 		} else if (typeHeader.tag == UniversalType::Integer && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) {
       
   182 			std::vector<uint8_t> value = readVector(typeHeader.length);
       
   183 			handlers->writeInteger(typeHeader, ASN1ContentHandler::Integer(value));
       
   184 		} else if (typeHeader.tag == UniversalType::ObjectIdentifier && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) {
       
   185 			std::vector<uint8_t> value(typeHeader.length, 0x00);
       
   186 			read(value.data(), typeHeader.length);
       
   187 			handlers->writeOID(typeHeader,{value});
       
   188 		} else if (typeHeader.tag == UniversalType::UTF8String && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) {
       
   189 			std::string s = readString(typeHeader.length);
       
   190 			handlers->writeTextString(typeHeader, s);
       
   191 		} else if (typeHeader.tag == UniversalType::PrintableString && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) {
       
   192 			// TODO: check encoding
       
   193 			std::string s = readString(typeHeader.length);
       
   194 			handlers->writeTextString(typeHeader, s);
       
   195 		} else if (typeHeader.tag == UniversalType::OctetString && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) {
       
   196 			std::string s = readString(typeHeader.length);
       
   197 			if (processEncapsulatedContent(typeHeader, s) == false) handlers->writeOctetString(typeHeader, s);
       
   198 		} else if (typeHeader.tag == UniversalType::BitString && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) {
       
   199 			std::string s = readString(typeHeader.length);
       
   200 			if (processEncapsulatedContent(typeHeader, s) == false) {
       
   201 				std::vector<bool> bits;
       
   202 				// TODO: throw exception on wrong padding or insufficient length?
       
   203 				if (s.size() > 1) {
       
   204 					uint8_t padding = s[0];
       
   205 					for (uint8_t j = padding; j < 8; j++) bits.push_back(s.back() & 1 << j);
       
   206 					for (size_t i = s.size() - 2; i > 0; i--) for (uint8_t j = 0; j < 8; j++) bits.push_back(s[i] & 1 << j);
       
   207 				}
       
   208 				handlers->writeBitString(typeHeader, bits);
       
   209 			}
       
   210 		} else if (typeHeader.tag == UniversalType::UTCTime && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) {
       
   211 			// TODO: check encoding
       
   212 			std::string s = readString(typeHeader.length);
       
   213 
       
   214 			ASN1ContentHandler::DateTime dateTime;
       
   215 
       
   216 			std::smatch match;
       
   217 			if (std::regex_match(s, match, std::regex("([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})?(Z|([+-][0-9]{2})([0-9]{2}))"))) {
       
   218 				int i = 1;
       
   219 				uint32_t year = std::stoi(match[i++]);
       
   220 				dateTime.year = year < 50 ? 2000 + year : 1900 + year;
       
   221 				dateTime.month = std::stoi(match[i++]);
       
   222 				dateTime.day = std::stoi(match[i++]);
       
   223 				dateTime.hour = std::stoi(match[i++]);
       
   224 				dateTime.minute = std::stoi(match[i++]);
       
   225 				dateTime.precision = match[i].length() ? ASN1ContentHandler::DateTime::Precision::Second : ASN1ContentHandler::DateTime::Precision::Minute;
       
   226 				dateTime.second = match[i].length() ? std::stoi(match[i]) : 0;
       
   227 				i++;
       
   228 				if (match[i++] != "Z") {
       
   229 					dateTime.timezoneHour = std::stoi(match[i++]);
       
   230 					dateTime.timezoneMinute = std::stoi(match[i++]);
       
   231 				}
       
   232 				handlers->writeDateTime(typeHeader, dateTime);
       
   233 			} else {
       
   234 				throw std::invalid_argument("Unsupported UTCTime format: " + s); // TODO: better exception
       
   235 			}
       
   236 
       
   237 		} else if (typeHeader.tag == UniversalType::GeneralizedTime && typeHeader.tagClass == TagClass::Universal && typeHeader.definiteLength) {
       
   238 			std::string s = readString(typeHeader.length);
       
   239 
       
   240 			ASN1ContentHandler::DateTime dateTime;
       
   241 
       
   242 			std::smatch match;
       
   243 			if (std::regex_match(s, match, std::regex("([0-9]{4})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})(\\.([0-9]{1,3}))?(Z|([+-][0-9]{2})([0-9]{2}))"))) {
       
   244 				// TODO: support also fractions of minutes and hours in GeneralizedTime
       
   245 				int i = 1;
       
   246 				dateTime.year = std::stoi(match[i++]);
       
   247 				dateTime.month = std::stoi(match[i++]);
       
   248 				dateTime.day = std::stoi(match[i++]);
       
   249 				dateTime.hour = std::stoi(match[i++]);
       
   250 				dateTime.minute = std::stoi(match[i++]);
       
   251 				dateTime.second = match[i].length() ? std::stoi(match[i++]) : 0;
       
   252 				dateTime.precision = match[i++].length() ? ASN1ContentHandler::DateTime::Precision::Nanosecond : ASN1ContentHandler::DateTime::Precision::Second;
       
   253 				if (match[i].length() == 1) dateTime.nanosecond = std::stoi(match[i++]) * 100 * 1000000;
       
   254 				else if (match[i].length() == 2) dateTime.nanosecond = std::stoi(match[i++]) * 10 * 1000000;
       
   255 				else if (match[i].length() == 3) dateTime.nanosecond = std::stoi(match[i++]) * 1000000;
       
   256 				else i++;
       
   257 				if (match[i++] != "Z") {
       
   258 					dateTime.timezoneHour = std::stoi(match[i++]);
       
   259 					dateTime.timezoneMinute = std::stoi(match[i++]);
       
   260 				}
       
   261 				handlers->writeDateTime(typeHeader, dateTime);
       
   262 			} else {
       
   263 				throw std::invalid_argument("Unsupported GeneralizedTime format: " + s); // TODO: better exception
       
   264 			}
       
   265 
       
   266 		} else {
       
   267 			// TODO: do not skip, parse
       
   268 			std::string s = readString(typeHeader.length);
       
   269 			handlers->writeSpecific(typeHeader, s);
       
   270 		}
       
   271 
       
   272 		commit();
       
   273 	}
       
   274 
       
   275 	bool hasAvailableForReading() {
       
   276 		// TODO: API in AbstractParser for checking available bytes?
       
   277 		uint8_t tmp;
       
   278 		try {
       
   279 			peek(&tmp, 1);
       
   280 			return true;
       
   281 		} catch (...) {
       
   282 			return false;
       
   283 		}
       
   284 	}
       
   285 
       
   286 	bool isValidBER(const std::string& input) {
       
   287 		BasicASN1Reader encapsulatedReader;
       
   288 		std::shared_ptr<ValidatingASN1ContentHandler> validatingHandler = std::make_shared<ValidatingASN1ContentHandler>();
       
   289 		encapsulatedReader.addHandler(validatingHandler);
       
   290 		try {
       
   291 			encapsulatedReader.write((const uint8_t*) input.c_str(), input.size());
       
   292 			encapsulatedReader.close();
       
   293 			validatingHandler->finalCheck();
       
   294 			return true;
       
   295 		} catch (...) {
       
   296 			return false;
       
   297 		}
       
   298 	}
       
   299 
       
   300 	class EncapsulatedASN1ContentHandler : public ASN1ContentHandlerProxy {
       
   301 	public:
       
   302 
       
   303 		void writeStreamStart() override {
       
   304 			// skip this event
       
   305 		}
       
   306 
       
   307 		void writeStreamEnd() override {
       
   308 			// skip this event
       
   309 		}
       
   310 	};
       
   311 
       
   312 	/**
       
   313 	 * @param typeHeader
       
   314 	 * @param input OCTET STRING or BIT STRING raw bytes
       
   315 	 * @return whether we found valid content and passed parsed results to handlers
       
   316 	 */
       
   317 	bool processEncapsulatedContent(const BasicHeader& typeHeader, const std::string& input) {
       
   318 		// TODO: avoid double parsing + encapsulated content might be also processed at the XML/DOM level where we may even do conditional processing based on XPath (evaluate only certain octet- or bit- strings)
       
   319 		// We may also do the same as with SEQUENCE or SET (continue nested reading in this ASN1Rreader instance), but it would require valid encapsulated data and would avoid easy fallback to raw OCTET or BIT STRING. We would also have to check the boundaries of the nested part.
       
   320 		if (parseEncapsulated && isValidBER(input)) {
       
   321 			handlers->writeCollectionStart(typeHeader);
       
   322 
       
   323 			BasicASN1Reader encapsulatedReader;
       
   324 			std::shared_ptr<EncapsulatedASN1ContentHandler> encapsulatedHandler = std::make_shared<EncapsulatedASN1ContentHandler>();
       
   325 			encapsulatedHandler->addHandler(handlers);
       
   326 			encapsulatedReader.addHandler(encapsulatedHandler);
       
   327 
       
   328 			encapsulatedReader.write((const uint8_t*) input.c_str(), input.size());
       
   329 			encapsulatedReader.close();
       
   330 
       
   331 			handlers->writeCollectionEnd();
       
   332 			return true;
       
   333 		} else {
       
   334 			return false;
       
   335 		}
       
   336 	}
       
   337 
       
   338 protected:
       
   339 
       
   340 	void update() override {
       
   341 		while (true) processNext();
       
   342 	}
       
   343 
       
   344 public:
       
   345 
       
   346 	bool setOption(const std::string& uri, const std::string& value) override {
       
   347 		if (uri == option::Encoding && value == encoding::ber); // currently, we support only BER (and thus also CER and DER) encoding, but options have no actual effect – we just validate them
       
   348 		else if (uri == option::Encoding && value == encoding::cer); // in future versions, this might switch the parser into more strict mode
       
   349 		else if (uri == option::Encoding && value == encoding::der); // in future versions, this might switch the parser into more strict mode
       
   350 		else if (uri == option::Encoding && value == encoding::per) throw std::invalid_argument("PER encoding is not yet supported");
       
   351 		else if (uri == option::Encoding && value == encoding::xer) throw std::invalid_argument("XER encoding is not yet supported");
       
   352 		else if (uri == option::Encoding && value == encoding::asn1) throw std::invalid_argument("ASN.1 encoding is not yet supported");
       
   353 		else if (uri == option::Encoding) throw std::invalid_argument("Unsupported ASN.1 encoding: " + value);
       
   354 		else if (uri == option::ParseEncapsulated) parseEncapsulated = parseBoolean(value);
       
   355 		else return false;
       
   356 
       
   357 		return true;
       
   358 	}
       
   359 
       
   360 	void close() override {
       
   361 		if (hasAvailableForReading()) throw std::logic_error("Unexpected content at the end of the stream"); // TODO: better exception
       
   362 
       
   363 		// TODO: check also open sequences etc.; maybe in the handler
       
   364 
       
   365 		checkRemainingItems();
       
   366 		// TODO: check the bytes remaining in the buffer
       
   367 		if (started) handlers->writeStreamEnd();
       
   368 	}
       
   369 
       
   370 };
       
   371 
       
   372 }
       
   373 }
       
   374 }
       
   375 }