# HG changeset patch # User František Kučera # Date 1606347524 -3600 # Node ID 4b1612d20cb2f76b73e497a9892a17e8e6442ee3 # Parent db994a2ddffab342cee6c5ff618afe9a54a29202 unescape also Java .properties encoding (\uXXXX): first version diff -r db994a2ddffa -r 4b1612d20cb2 src/INICommand.cpp --- a/src/INICommand.cpp Wed Nov 25 21:35:07 2020 +0100 +++ b/src/INICommand.cpp Thu Nov 26 00:38:44 2020 +0100 @@ -29,6 +29,7 @@ #include "INICommand.h" #include "lib/INIReader.h" #include "lib/BasicUnescapingINIHandler.h" +#include "lib/JavaPropertiesUnescapingINIHandler.h" using namespace std; using namespace relpipe::writer; @@ -170,8 +171,9 @@ FlatINIContentHandler handler(writer, configuration); std::shared_ptr reader(INIReader::create(input)); // TODO: configure the INIReader (features/properties) according to our Configuration (sub-keys etc.) - BasicUnescapingINIContentHandler unescapingHandler(handler, true); - reader->addHandler(&unescapingHandler); + BasicUnescapingINIContentHandler unescapingHandler(handler, false); + JavaPropertiesUnescapingINIContentHandler javaHandler(unescapingHandler, true); + reader->addHandler(&javaHandler); reader->process(); } diff -r db994a2ddffa -r 4b1612d20cb2 src/lib/JavaPropertiesUnescapingINIHandler.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/JavaPropertiesUnescapingINIHandler.h Thu Nov 26 00:38:44 2020 +0100 @@ -0,0 +1,105 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include + +#include "UnescapingINIHandler.h" + +using namespace std; +using namespace relpipe::writer; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +class JavaPropertiesUnescapingINIContentHandler : public UnescapingINIContentHandler { +private: + wstring_convert < codecvt_utf8> convertor; // INI parser works with UTF-8 + + bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) { + if (hexLength != binLength * 2) return false; + + for (size_t i = 0; i < binLength; i++) { + uint8_t value = 0; + char a = hexadecimal[i * 2]; + char b = hexadecimal[i * 2 + 1]; + + if (a >= '0' && a <= '9') value += (a - '0')*16; + else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16; + else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16; + else return false; + + if (b >= '0' && b <= '9') value += b - '0'; + else if (b >= 'a' && b <= 'f') value += b - 'a' + 10; + else if (b >= 'A' && b <= 'F') value += b - 'A' + 10; + else return false; + + if (resultBuffer) resultBuffer[i] = value; + } + return true; + } + +protected: + + virtual std::string unescape(const std::string& s) { + std::stringstream result; + for (int i = 0, length = s.size(); i < length; i++) { + char ch = s[i]; + if (i + 1 < length && ch == ESC) { + ch = s[i + 1]; + if (ch == 'u') { + // TODO: simplify, clean-up, verify (but seems working) + i++; + int hexLength = 4; + if (i + hexLength < length) { + uint16_t u16; + bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16)); + if (hexOK) result << convertor.to_bytes(ntohs(u16)); + else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX")); + i += hexLength; + } else { + throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters")); + } + + } else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle + else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \. + else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch); + else result.put(ESC); // keep the escape sequence for later unescaping phase + } else if (ch == ESC) { + throw std::logic_error(std::string("Missing escape sequence")); // this should not happen + } else { + result.put(ch); + } + } + return result.str(); + } + +public: + + JavaPropertiesUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase) { + } + +}; + +} +} +} +} diff -r db994a2ddffa -r 4b1612d20cb2 src/lib/UnescapingINIHandler.h --- a/src/lib/UnescapingINIHandler.h Wed Nov 25 21:35:07 2020 +0100 +++ b/src/lib/UnescapingINIHandler.h Thu Nov 26 00:38:44 2020 +0100 @@ -84,6 +84,7 @@ } void comment(const CommentEvent& event) override { + // TODO: optionally unescape also comments (e.g. Java .properties) output.comment(event); }