--- a/src/INICommand.cpp Wed Nov 25 21:35:07 2020 +0100
+++ b/src/INICommand.cpp Thu Nov 26 00:38:44 2020 +0100
@@ -29,6 +29,7 @@
#include "INICommand.h"
#include "lib/INIReader.h"
#include "lib/BasicUnescapingINIHandler.h"
+#include "lib/JavaPropertiesUnescapingINIHandler.h"
using namespace std;
using namespace relpipe::writer;
@@ -170,8 +171,9 @@
FlatINIContentHandler handler(writer, configuration);
std::shared_ptr<INIReader> reader(INIReader::create(input));
// TODO: configure the INIReader (features/properties) according to our Configuration (sub-keys etc.)
- BasicUnescapingINIContentHandler unescapingHandler(handler, true);
- reader->addHandler(&unescapingHandler);
+ BasicUnescapingINIContentHandler unescapingHandler(handler, false);
+ JavaPropertiesUnescapingINIContentHandler javaHandler(unescapingHandler, true);
+ reader->addHandler(&javaHandler);
reader->process();
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/JavaPropertiesUnescapingINIHandler.h Thu Nov 26 00:38:44 2020 +0100
@@ -0,0 +1,105 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+#include <codecvt>
+#include <arpa/inet.h>
+
+#include "UnescapingINIHandler.h"
+
+using namespace std;
+using namespace relpipe::writer;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class JavaPropertiesUnescapingINIContentHandler : public UnescapingINIContentHandler {
+private:
+ wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
+
+ bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) {
+ if (hexLength != binLength * 2) return false;
+
+ for (size_t i = 0; i < binLength; i++) {
+ uint8_t value = 0;
+ char a = hexadecimal[i * 2];
+ char b = hexadecimal[i * 2 + 1];
+
+ if (a >= '0' && a <= '9') value += (a - '0')*16;
+ else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16;
+ else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16;
+ else return false;
+
+ if (b >= '0' && b <= '9') value += b - '0';
+ else if (b >= 'a' && b <= 'f') value += b - 'a' + 10;
+ else if (b >= 'A' && b <= 'F') value += b - 'A' + 10;
+ else return false;
+
+ if (resultBuffer) resultBuffer[i] = value;
+ }
+ return true;
+ }
+
+protected:
+
+ virtual std::string unescape(const std::string& s) {
+ std::stringstream result;
+ for (int i = 0, length = s.size(); i < length; i++) {
+ char ch = s[i];
+ if (i + 1 < length && ch == ESC) {
+ ch = s[i + 1];
+ if (ch == 'u') {
+ // TODO: simplify, clean-up, verify (but seems working)
+ i++;
+ int hexLength = 4;
+ if (i + hexLength < length) {
+ uint16_t u16;
+ bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16));
+ if (hexOK) result << convertor.to_bytes(ntohs(u16));
+ else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX"));
+ i += hexLength;
+ } else {
+ throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters"));
+ }
+
+ } else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
+ else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \.
+ else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch);
+ else result.put(ESC); // keep the escape sequence for later unescaping phase
+ } else if (ch == ESC) {
+ throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
+ } else {
+ result.put(ch);
+ }
+ }
+ return result.str();
+ }
+
+public:
+
+ JavaPropertiesUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase) {
+ }
+
+};
+
+}
+}
+}
+}
--- a/src/lib/UnescapingINIHandler.h Wed Nov 25 21:35:07 2020 +0100
+++ b/src/lib/UnescapingINIHandler.h Thu Nov 26 00:38:44 2020 +0100
@@ -84,6 +84,7 @@
}
void comment(const CommentEvent& event) override {
+ // TODO: optionally unescape also comments (e.g. Java .properties)
output.comment(event);
}