diff -r fd669e73d39a -r 0e7c57d48d1e src/lib/JavaPropertiesUnescapingProcessor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/JavaPropertiesUnescapingProcessor.h Sat Nov 28 18:14:15 2020 +0100 @@ -0,0 +1,101 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include + +#include "UnescapingProcessor.h" + +using namespace std; +using namespace relpipe::writer; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +/** + * Should work according to 3.3. Unicode Escapes + */ +class JavaPropertiesUnescapingProcessor : public UnescapingProcessor { +private: + wstring_convert < codecvt_utf8> convertor; // INI parser works with UTF-8 + + bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) { + if (hexLength != binLength * 2) return false; + + for (size_t i = 0; i < binLength; i++) { + uint8_t value = 0; + char a = hexadecimal[i * 2]; + char b = hexadecimal[i * 2 + 1]; + + if (a >= '0' && a <= '9') value += (a - '0')*16; + else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16; + else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16; + else return false; + + if (b >= '0' && b <= '9') value += b - '0'; + else if (b >= 'a' && b <= 'f') value += b - 'a' + 10; + else if (b >= 'A' && b <= 'F') value += b - 'A' + 10; + else return false; + + if (resultBuffer) resultBuffer[i] = value; + } + return true; + } + +public: + + std::string unescape(const std::string& s, const TextType type) override { + std::stringstream result; + for (int i = 0, length = s.size(); i < length; i++) { + char ch = s[i]; + if (i + 1 < length && ch == ESC) { + ch = s[i + 1]; + if (ch == 'u') { + // TODO: simplify, clean-up, verify (but seems working) + i++; + int hexLength = 4; + if (i + hexLength < length) { + uint16_t u16; + bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16)); + if (hexOK) result << convertor.to_bytes(ntohs(u16)); + else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX")); + i += hexLength; + } else { + throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters")); + } + + } else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle + else result.put(ESC); // keep the escape sequence for later unescaping phase + } else if (ch == ESC) { + throw std::logic_error(std::string("Missing escape sequence")); // this should not happen + } else { + result.put(ch); + } + } + return result.str(); + } + +}; + +} +} +} +}