src/lib/JavaPropertiesUnescapingINIHandler.h
branchv_0
changeset 28 0e7c57d48d1e
parent 27 fd669e73d39a
child 29 06aaad12c207
equal deleted inserted replaced
27:fd669e73d39a 28:0e7c57d48d1e
     1 /**
       
     2  * Relational pipes
       
     3  * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
       
     4  *
       
     5  * This program is free software: you can redistribute it and/or modify
       
     6  * it under the terms of the GNU General Public License as published by
       
     7  * the Free Software Foundation, version 3 of the License.
       
     8  *
       
     9  * This program is distributed in the hope that it will be useful,
       
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
       
    12  * GNU General Public License for more details.
       
    13  *
       
    14  * You should have received a copy of the GNU General Public License
       
    15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
       
    16  */
       
    17 #pragma once
       
    18 
       
    19 #include <sstream>
       
    20 #include <codecvt>
       
    21 #include <arpa/inet.h>
       
    22 
       
    23 #include "UnescapingINIHandler.h"
       
    24 
       
    25 using namespace std;
       
    26 using namespace relpipe::writer;
       
    27 
       
    28 namespace relpipe {
       
    29 namespace in {
       
    30 namespace ini {
       
    31 namespace lib {
       
    32 
       
    33 class JavaPropertiesUnescapingINIContentHandler : public UnescapingINIContentHandler {
       
    34 private:
       
    35 	wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
       
    36 
       
    37 	bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) {
       
    38 		if (hexLength != binLength * 2) return false;
       
    39 
       
    40 		for (size_t i = 0; i < binLength; i++) {
       
    41 			uint8_t value = 0;
       
    42 			char a = hexadecimal[i * 2];
       
    43 			char b = hexadecimal[i * 2 + 1];
       
    44 
       
    45 			if (a >= '0' && a <= '9') value += (a - '0')*16;
       
    46 			else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16;
       
    47 			else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16;
       
    48 			else return false;
       
    49 
       
    50 			if (b >= '0' && b <= '9') value += b - '0';
       
    51 			else if (b >= 'a' && b <= 'f') value += b - 'a' + 10;
       
    52 			else if (b >= 'A' && b <= 'F') value += b - 'A' + 10;
       
    53 			else return false;
       
    54 
       
    55 			if (resultBuffer) resultBuffer[i] = value;
       
    56 		}
       
    57 		return true;
       
    58 	}
       
    59 
       
    60 protected:
       
    61 
       
    62 	virtual std::string unescape(const std::string& s) {
       
    63 		std::stringstream result;
       
    64 		for (int i = 0, length = s.size(); i < length; i++) {
       
    65 			char ch = s[i];
       
    66 			if (i + 1 < length && ch == ESC) {
       
    67 				ch = s[i + 1];
       
    68 				if (ch == 'u') {
       
    69 					// TODO: simplify, clean-up, verify (but seems working)
       
    70 					i++;
       
    71 					int hexLength = 4;
       
    72 					if (i + hexLength < length) {
       
    73 						uint16_t u16;
       
    74 						bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16));
       
    75 						if (hexOK) result << convertor.to_bytes(ntohs(u16));
       
    76 						else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX"));
       
    77 						i += hexLength;
       
    78 					} else {
       
    79 						throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters"));
       
    80 					}
       
    81 
       
    82 				} else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
       
    83 				else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \.
       
    84 				else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch);
       
    85 				else result.put(ESC); // keep the escape sequence for later unescaping phase
       
    86 			} else if (ch == ESC) {
       
    87 				throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
       
    88 			} else {
       
    89 				result.put(ch);
       
    90 			}
       
    91 		}
       
    92 		return result.str();
       
    93 	}
       
    94 
       
    95 public:
       
    96 
       
    97 	JavaPropertiesUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase, true) {
       
    98 	}
       
    99 
       
   100 };
       
   101 
       
   102 }
       
   103 }
       
   104 }
       
   105 }