src/lib/JavaPropertiesUnescapingProcessor.h
branchv_0
changeset 28 0e7c57d48d1e
parent 27 fd669e73d39a
equal deleted inserted replaced
27:fd669e73d39a 28:0e7c57d48d1e
       
     1 /**
       
     2  * Relational pipes
       
     3  * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
       
     4  *
       
     5  * This program is free software: you can redistribute it and/or modify
       
     6  * it under the terms of the GNU General Public License as published by
       
     7  * the Free Software Foundation, version 3 of the License.
       
     8  *
       
     9  * This program is distributed in the hope that it will be useful,
       
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
       
    12  * GNU General Public License for more details.
       
    13  *
       
    14  * You should have received a copy of the GNU General Public License
       
    15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
       
    16  */
       
    17 #pragma once
       
    18 
       
    19 #include <sstream>
       
    20 #include <codecvt>
       
    21 #include <arpa/inet.h>
       
    22 
       
    23 #include "UnescapingProcessor.h"
       
    24 
       
    25 using namespace std;
       
    26 using namespace relpipe::writer;
       
    27 
       
    28 namespace relpipe {
       
    29 namespace in {
       
    30 namespace ini {
       
    31 namespace lib {
       
    32 
       
    33 /**
       
    34  * Should work according to <https://docs.oracle.com/javase/specs/jls/se15/html/jls-3.html#jls-3.3> 3.3. Unicode Escapes 
       
    35  */
       
    36 class JavaPropertiesUnescapingProcessor : public UnescapingProcessor {
       
    37 private:
       
    38 	wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
       
    39 
       
    40 	bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) {
       
    41 		if (hexLength != binLength * 2) return false;
       
    42 
       
    43 		for (size_t i = 0; i < binLength; i++) {
       
    44 			uint8_t value = 0;
       
    45 			char a = hexadecimal[i * 2];
       
    46 			char b = hexadecimal[i * 2 + 1];
       
    47 
       
    48 			if (a >= '0' && a <= '9') value += (a - '0')*16;
       
    49 			else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16;
       
    50 			else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16;
       
    51 			else return false;
       
    52 
       
    53 			if (b >= '0' && b <= '9') value += b - '0';
       
    54 			else if (b >= 'a' && b <= 'f') value += b - 'a' + 10;
       
    55 			else if (b >= 'A' && b <= 'F') value += b - 'A' + 10;
       
    56 			else return false;
       
    57 
       
    58 			if (resultBuffer) resultBuffer[i] = value;
       
    59 		}
       
    60 		return true;
       
    61 	}
       
    62 
       
    63 public:
       
    64 	
       
    65 	std::string unescape(const std::string& s, const TextType type) override {
       
    66 		std::stringstream result;
       
    67 		for (int i = 0, length = s.size(); i < length; i++) {
       
    68 			char ch = s[i];
       
    69 			if (i + 1 < length && ch == ESC) {
       
    70 				ch = s[i + 1];
       
    71 				if (ch == 'u') {
       
    72 					// TODO: simplify, clean-up, verify (but seems working)
       
    73 					i++;
       
    74 					int hexLength = 4;
       
    75 					if (i + hexLength < length) {
       
    76 						uint16_t u16;
       
    77 						bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16));
       
    78 						if (hexOK) result << convertor.to_bytes(ntohs(u16));
       
    79 						else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX"));
       
    80 						i += hexLength;
       
    81 					} else {
       
    82 						throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters"));
       
    83 					}
       
    84 
       
    85 				} else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
       
    86 				else result.put(ESC); // keep the escape sequence for later unescaping phase
       
    87 			} else if (ch == ESC) {
       
    88 				throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
       
    89 			} else {
       
    90 				result.put(ch);
       
    91 			}
       
    92 		}
       
    93 		return result.str();
       
    94 	}
       
    95 
       
    96 };
       
    97 
       
    98 }
       
    99 }
       
   100 }
       
   101 }