configurable unescaping processors v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sat, 28 Nov 2020 18:10:47 +0100
branchv_0
changeset 27 e9aad9dd823a
parent 26 b1f6fa3a6555
child 28 596a724fbb83
configurable unescaping processors
nbproject/configurations.xml
src/INICommand.cpp
src/lib/BackspaceUnescapingProcessor.h
src/lib/BasicUnescapingINIHandler.h
src/lib/BasicUnescapingProcessor.h
src/lib/INIReader.cpp
src/lib/INIReader.h
src/lib/JavaPropertiesUnescapingINIHandler.h
src/lib/JavaPropertiesUnescapingProcessor.h
src/lib/UnescapingINIHandler.h
src/lib/UnescapingProcessor.h
--- a/nbproject/configurations.xml	Sat Nov 28 00:46:40 2020 +0100
+++ b/nbproject/configurations.xml	Sat Nov 28 18:10:47 2020 +0100
@@ -46,7 +46,7 @@
           <in>INIContentHandler.h</in>
           <in>INIReader.cpp</in>
           <in>INIReader.h</in>
-          <in>UnescapingINIHandler.h</in>
+          <in>UnescapingProcessor.h</in>
         </df>
         <in>INICommand.cpp</in>
         <in>relpipe-in-ini.cpp</in>
@@ -147,7 +147,7 @@
       </item>
       <item path="src/lib/INIReader.h" ex="false" tool="3" flavor2="0">
       </item>
-      <item path="src/lib/UnescapingINIHandler.h" ex="false" tool="3" flavor2="0">
+      <item path="src/lib/UnescapingProcessor.h" ex="false" tool="3" flavor2="0">
       </item>
     </conf>
   </confs>
--- a/src/INICommand.cpp	Sat Nov 28 00:46:40 2020 +0100
+++ b/src/INICommand.cpp	Sat Nov 28 18:10:47 2020 +0100
@@ -28,8 +28,9 @@
 
 #include "INICommand.h"
 #include "lib/INIReader.h"
-#include "lib/BasicUnescapingINIHandler.h"
-#include "lib/JavaPropertiesUnescapingINIHandler.h"
+#include "lib/BasicUnescapingProcessor.h"
+#include "lib/BackspaceUnescapingProcessor.h"
+#include "lib/JavaPropertiesUnescapingProcessor.h"
 
 using namespace std;
 using namespace relpipe::writer;
@@ -170,10 +171,13 @@
 void INICommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
 	FlatINIContentHandler handler(writer, configuration);
 	std::shared_ptr<INIReader> reader(INIReader::create(input));
+	reader->addUnescapingProcessor(std::make_shared<BasicUnescapingProcessor>(), "unescape-basic", true);
+	reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), "unescape-java-properties", false);
+	reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(false), "unescape-backspace-disorder", false);
+	reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), "unescape-backspace", true);
+	reader->addHandler(&handler);
+	// TODO: smart pointers vs. references: are we going to call addUnescapingProcessor() dynamically/conditionally or share instances? Then pointers will be better.
 	for (ParserOptionRecipe option : configuration.parserOptions) reader->setOption(convertor.to_bytes(option.uri), convertor.to_bytes(option.value));
-	BasicUnescapingINIContentHandler unescapingHandler(handler, false);
-	JavaPropertiesUnescapingINIContentHandler javaHandler(unescapingHandler, true);
-	reader->addHandler(&javaHandler);
 	reader->process();
 
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/BackspaceUnescapingProcessor.h	Sat Nov 28 18:10:47 2020 +0100
@@ -0,0 +1,70 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include "UnescapingProcessor.h"
+
+using namespace std;
+using namespace relpipe::writer;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class BackspaceUnescapingProcessor : public UnescapingProcessor {
+private:
+	const bool lastEscaphingPhase = true;
+public:
+
+	std::string unescape(const std::string& s, const TextType type) override {
+		std::stringstream result;
+		for (int i = 0, length = s.size(); i < length; i++) {
+			char ch = s[i];
+			if (i + 1 < length && ch == ESC) {
+				ch = s[i + 1];
+				if (ch == ESC) put(result, ESC, i); // unescape \\ to \.
+				else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch);
+				else result.put(ESC); // keep the escape sequence for later unescaping phase
+			} else if (ch == ESC) {
+				throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
+			} else {
+				result.put(ch);
+			}
+		}
+		return result.str();
+	}
+
+	/**
+	 * @param lastEscaphingPhase whether this is final unescaping stage.
+	 * By default it is set to true, thus no unrecognized escape sequences may left after this stage.
+	 * Setting this to false is dangerous and may lead to errors and ambiguous behavior.
+	 * It should be used only as a last resort.
+	 * Because both "\\ \xxx" and "\ \xxx" will be converted to "\ \xxx" and the information will be lost.
+	 * So, it is usually better to keep the "\" escaped as "\\" and process both the escaped backspaces and unrecognized escape sequences later.
+	 */
+	BackspaceUnescapingProcessor(bool lastEscaphingPhase = true) : lastEscaphingPhase(lastEscaphingPhase) {
+	}
+
+};
+
+}
+}
+}
+}
--- a/src/lib/BasicUnescapingINIHandler.h	Sat Nov 28 00:46:40 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-/**
- * Relational pipes
- * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#pragma once
-
-#include <sstream>
-
-#include "UnescapingINIHandler.h"
-
-using namespace std;
-using namespace relpipe::writer;
-
-namespace relpipe {
-namespace in {
-namespace ini {
-namespace lib {
-
-class BasicUnescapingINIContentHandler : public UnescapingINIContentHandler {
-protected:
-
-	virtual std::string unescape(const std::string& s) {
-		std::stringstream result;
-		for (int i = 0, length = s.size(); i < length; i++) {
-			char ch = s[i];
-			if (i + 1 < length && ch == ESC) {
-				ch = s[i + 1];
-				if (ch == 'n') put(result, '\n', i);
-				else if (ch == 'r') put(result, '\r', i);
-				else if (ch == 't') put(result, '\t', i);
-				else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported.
-				else if (ch == '"') put(result, ch, i); //        The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value).
-				else if (ch == '\'') put(result, ch, i); //       So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work.
-				else if (ch == ']') put(result, ch, i);
-				else if (ch == ':') put(result, ch, i);
-				else if (ch == ';') put(result, ch, i);
-				else if (ch == '#') put(result, ch, i);
-				else if (ch == '=') put(result, ch, i);
-				else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
-				else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \.
-				else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch);
-				else result.put(ESC); // keep the escape sequence for later unescaping phase
-			} else if (ch == ESC) {
-				throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
-			} else {
-				result.put(ch);
-			}
-		}
-		return result.str();
-	}
-
-public:
-
-	BasicUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase) {
-	}
-
-};
-
-}
-}
-}
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/BasicUnescapingProcessor.h	Sat Nov 28 18:10:47 2020 +0100
@@ -0,0 +1,67 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include "UnescapingProcessor.h"
+
+using namespace std;
+using namespace relpipe::writer;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class BasicUnescapingProcessor : public UnescapingProcessor {
+public:
+
+	std::string unescape(const std::string& s, const TextType type) override {
+		std::stringstream result;
+		for (int i = 0, length = s.size(); i < length; i++) {
+			char ch = s[i];
+			if (i + 1 < length && ch == ESC) {
+				ch = s[i + 1];
+				if (ch == 'n') put(result, '\n', i);
+				else if (ch == 'r') put(result, '\r', i);
+				else if (ch == 't') put(result, '\t', i);
+				else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported.
+				else if (ch == '"') put(result, ch, i); //        The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value).
+				else if (ch == '\'') put(result, ch, i); //       So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work.
+				else if (ch == ']') put(result, ch, i);
+				else if (ch == ':') put(result, ch, i);
+				else if (ch == ';') put(result, ch, i);
+				else if (ch == '#') put(result, ch, i);
+				else if (ch == '=') put(result, ch, i);
+				else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
+				else result.put(ESC); // keep the escape sequence for later unescaping phase
+			} else if (ch == ESC) {
+				throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
+			} else {
+				result.put(ch);
+			}
+		}
+		return result.str();
+	}
+	
+};
+
+}
+}
+}
+}
--- a/src/lib/INIReader.cpp	Sat Nov 28 00:46:40 2020 +0100
+++ b/src/lib/INIReader.cpp	Sat Nov 28 18:10:47 2020 +0100
@@ -32,6 +32,19 @@
 	std::istream& input;
 	std::vector<INIContentHandler*> handlers;
 
+	class ConfiguredUnescapingProcessor {
+	public:
+		std::shared_ptr<UnescapingProcessor> processor;
+		const std::string uri;
+		bool enbaled;
+
+		ConfiguredUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enbaled) : processor(processor), uri(uri), enbaled(enbaled) {
+		}
+
+	};
+
+	std::vector<ConfiguredUnescapingProcessor> unescapingProcessors;
+
 	/** 
 	 * By default, we ignore all leading whitespace on continuing lines.
 	 * If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
@@ -221,6 +234,12 @@
 		return result;
 	}
 
+	std::string unescape(const std::string& value, UnescapingProcessor::TextType type) {
+		std::string result = value;
+		for (ConfiguredUnescapingProcessor p : unescapingProcessors) if (p.enbaled) result = p.processor->unescape(result, type);
+		return result;
+	}
+
 	bool isComment(char ch) {
 		return oneOf(ch, commentSeparators);
 	}
@@ -268,6 +287,16 @@
 		}
 	}
 
+	bool setUnescaping(const std::string& uri, const std::string& value) {
+		for (ConfiguredUnescapingProcessor& p : unescapingProcessors) {
+			if (p.uri == uri) {
+				p.enbaled = parseBoolean(value);
+				return true;
+			}
+		}
+		return false;
+	}
+
 public:
 
 	INIReaderImpl(std::istream& input) : input(input) {
@@ -282,6 +311,7 @@
 		else if (uri == "key-value-separators") keyValueSeparators = value;
 		else if (uri == "quotes") quotes = value;
 		else if (uri == "dialect") setDialect(value);
+		else if (setUnescaping(uri, value));
 		else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
 	}
 
@@ -289,6 +319,10 @@
 		handlers.push_back(handler);
 	}
 
+	void addUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabledByDefault) override {
+		unescapingProcessors.push_back({processor, uri, enabledByDefault});
+	}
+
 	void process() override {
 		for (INIContentHandler* handler : handlers) handler->startDocument();
 
@@ -323,11 +357,13 @@
 				readAllWhitespace();
 				event.name = readTokenAndEatTerminator(']', &quote, &found);
 				if (!quote) event.name = trim(event.name);
+				event.name = unescape(event.name, UnescapingProcessor::TextType::SectionName);
 
 				readSpacesAndTabs();
 				if (allowSectionTags && peek() == '[') {
 					get();
 					event.tag = readTokenAndEatTerminator(']', &quote, &found);
+					event.tag = unescape(event.tag, UnescapingProcessor::TextType::SectionTag);
 				}
 
 				readSpacesAndTabs();
@@ -336,6 +372,7 @@
 					get();
 					readSpacesAndTabs();
 					event.comment = readUntil('\n', &found);
+					event.comment = unescape(event.comment, UnescapingProcessor::TextType::SectionComment);
 				} else if (ch == '\n') {
 					get();
 				} else {
@@ -350,6 +387,7 @@
 				get();
 				readSpacesAndTabs();
 				event.comment = readUntil('\n', &found);
+				event.comment = unescape(event.comment, UnescapingProcessor::TextType::Comment);
 				for (INIContentHandler* handler : handlers) handler->comment(event);
 			} else {
 				INIContentHandler::EntryEvent event;
@@ -380,9 +418,14 @@
 						event.key = match[1];
 						event.subKey = match[2];
 						event.fullKey = fullKey;
+						event.subKey = unescape(event.subKey, UnescapingProcessor::TextType::EntryKey);
 					}
 				}
 
+				event.key = unescape(event.key, UnescapingProcessor::TextType::EntryKey);
+				event.fullKey = unescape(event.fullKey, UnescapingProcessor::TextType::EntryKey);
+				event.value = unescape(event.value, UnescapingProcessor::TextType::EntryValue);
+
 				if (quote) {
 					readSpacesAndTabs();
 					ch = peek();
@@ -390,6 +433,7 @@
 						get();
 						readSpacesAndTabs();
 						event.comment = readUntil('\n', &found);
+						event.comment = unescape(event.comment, UnescapingProcessor::TextType::EntryComment);
 					} else if (ch == '\n') {
 						get();
 					} else {
--- a/src/lib/INIReader.h	Sat Nov 28 00:46:40 2020 +0100
+++ b/src/lib/INIReader.h	Sat Nov 28 18:10:47 2020 +0100
@@ -20,6 +20,7 @@
 #include <istream>
 
 #include "INIContentHandler.h"
+#include "UnescapingProcessor.h"
 
 namespace relpipe {
 namespace in {
@@ -45,6 +46,7 @@
 	 *        - but both sides will know the schema (allowed elements and attributes for INI events)
 	 */
 	virtual void addHandler(INIContentHandler* handler) = 0;
+	virtual void addUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabledByDefault) = 0;
 	virtual void process() = 0;
 	static INIReader* create(std::istream& input);
 };
--- a/src/lib/JavaPropertiesUnescapingINIHandler.h	Sat Nov 28 00:46:40 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-/**
- * Relational pipes
- * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#pragma once
-
-#include <sstream>
-#include <codecvt>
-#include <arpa/inet.h>
-
-#include "UnescapingINIHandler.h"
-
-using namespace std;
-using namespace relpipe::writer;
-
-namespace relpipe {
-namespace in {
-namespace ini {
-namespace lib {
-
-class JavaPropertiesUnescapingINIContentHandler : public UnescapingINIContentHandler {
-private:
-	wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
-
-	bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) {
-		if (hexLength != binLength * 2) return false;
-
-		for (size_t i = 0; i < binLength; i++) {
-			uint8_t value = 0;
-			char a = hexadecimal[i * 2];
-			char b = hexadecimal[i * 2 + 1];
-
-			if (a >= '0' && a <= '9') value += (a - '0')*16;
-			else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16;
-			else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16;
-			else return false;
-
-			if (b >= '0' && b <= '9') value += b - '0';
-			else if (b >= 'a' && b <= 'f') value += b - 'a' + 10;
-			else if (b >= 'A' && b <= 'F') value += b - 'A' + 10;
-			else return false;
-
-			if (resultBuffer) resultBuffer[i] = value;
-		}
-		return true;
-	}
-
-protected:
-
-	virtual std::string unescape(const std::string& s) {
-		std::stringstream result;
-		for (int i = 0, length = s.size(); i < length; i++) {
-			char ch = s[i];
-			if (i + 1 < length && ch == ESC) {
-				ch = s[i + 1];
-				if (ch == 'u') {
-					// TODO: simplify, clean-up, verify (but seems working)
-					i++;
-					int hexLength = 4;
-					if (i + hexLength < length) {
-						uint16_t u16;
-						bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16));
-						if (hexOK) result << convertor.to_bytes(ntohs(u16));
-						else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX"));
-						i += hexLength;
-					} else {
-						throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters"));
-					}
-
-				} else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
-				else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \.
-				else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch);
-				else result.put(ESC); // keep the escape sequence for later unescaping phase
-			} else if (ch == ESC) {
-				throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
-			} else {
-				result.put(ch);
-			}
-		}
-		return result.str();
-	}
-
-public:
-
-	JavaPropertiesUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase, true) {
-	}
-
-};
-
-}
-}
-}
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/JavaPropertiesUnescapingProcessor.h	Sat Nov 28 18:10:47 2020 +0100
@@ -0,0 +1,101 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+#include <codecvt>
+#include <arpa/inet.h>
+
+#include "UnescapingProcessor.h"
+
+using namespace std;
+using namespace relpipe::writer;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+/**
+ * Should work according to <https://docs.oracle.com/javase/specs/jls/se15/html/jls-3.html#jls-3.3> 3.3. Unicode Escapes 
+ */
+class JavaPropertiesUnescapingProcessor : public UnescapingProcessor {
+private:
+	wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
+
+	bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) {
+		if (hexLength != binLength * 2) return false;
+
+		for (size_t i = 0; i < binLength; i++) {
+			uint8_t value = 0;
+			char a = hexadecimal[i * 2];
+			char b = hexadecimal[i * 2 + 1];
+
+			if (a >= '0' && a <= '9') value += (a - '0')*16;
+			else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16;
+			else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16;
+			else return false;
+
+			if (b >= '0' && b <= '9') value += b - '0';
+			else if (b >= 'a' && b <= 'f') value += b - 'a' + 10;
+			else if (b >= 'A' && b <= 'F') value += b - 'A' + 10;
+			else return false;
+
+			if (resultBuffer) resultBuffer[i] = value;
+		}
+		return true;
+	}
+
+public:
+	
+	std::string unescape(const std::string& s, const TextType type) override {
+		std::stringstream result;
+		for (int i = 0, length = s.size(); i < length; i++) {
+			char ch = s[i];
+			if (i + 1 < length && ch == ESC) {
+				ch = s[i + 1];
+				if (ch == 'u') {
+					// TODO: simplify, clean-up, verify (but seems working)
+					i++;
+					int hexLength = 4;
+					if (i + hexLength < length) {
+						uint16_t u16;
+						bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16));
+						if (hexOK) result << convertor.to_bytes(ntohs(u16));
+						else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX"));
+						i += hexLength;
+					} else {
+						throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters"));
+					}
+
+				} else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
+				else result.put(ESC); // keep the escape sequence for later unescaping phase
+			} else if (ch == ESC) {
+				throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
+			} else {
+				result.put(ch);
+			}
+		}
+		return result.str();
+	}
+
+};
+
+}
+}
+}
+}
--- a/src/lib/UnescapingINIHandler.h	Sat Nov 28 00:46:40 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-/**
- * Relational pipes
- * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#pragma once
-
-#include <sstream>
-
-#include "INIReader.h"
-
-using namespace std;
-using namespace relpipe::writer;
-
-namespace relpipe {
-namespace in {
-namespace ini {
-namespace lib {
-
-class UnescapingINIContentHandler : public INIContentHandler {
-private:
-	INIContentHandler& output;
-	bool unescapeComments;
-
-protected:
-	const char ESC = '\\';
-	bool lastEscaphingPhase;
-
-	std::stringstream& put(std::stringstream& result, const char& ch, int& i) {
-		result.put(ch);
-		i++;
-		return result;
-	}
-
-	virtual std::string unescape(const std::string& s) = 0;
-
-public:
-
-	/**
-	 * @param output here will be sent events with unescaped values
-	 * @param lastEscaphingPhase instances of UnescapingINIContentHandler might be chained:
-	 * unsupported escaping sequences are kept untouched to be processed in further phases;
-	 * in the last phase, all remaining sequences (including \\) must be recognized and unescaped
-	 * (otherwise the input is considered invalid and an exception is thrown)
-	 */
-	UnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase, bool unescapeComments = false) : output(output), lastEscaphingPhase(lastEscaphingPhase), unescapeComments(unescapeComments) {
-	}
-
-	void startDocument() override {
-		output.startDocument();
-	}
-
-	void endDocument() override {
-		output.endDocument();
-	}
-
-	void startSection(const SectionStartEvent& event) override {
-		SectionStartEvent e = event;
-		e.name = unescape(e.name);
-		if (unescapeComments) e.comment = unescape(e.comment);
-		output.startSection(e);
-	}
-
-	void endSection() override {
-		output.endSection();
-	}
-
-	void entry(const EntryEvent& event) override {
-		EntryEvent e = event;
-		e.key = unescape(e.key);
-		e.fullKey = unescape(e.fullKey);
-		e.subKey = unescape(e.subKey);
-		e.value = unescape(e.value);
-		if (unescapeComments) e.comment = unescape(e.comment);
-		output.entry(e);
-	}
-
-	void comment(const CommentEvent& event) override {
-		if (unescapeComments) {
-			CommentEvent e = event;
-			e.comment = unescape(e.comment);
-			output.comment(e);
-		} else {
-			output.comment(event);
-		}
-	}
-
-	void whitespace(const WhitespaceEvent& event) override {
-		output.whitespace(event);
-	}
-
-};
-
-}
-}
-}
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/UnescapingProcessor.h	Sat Nov 28 18:10:47 2020 +0100
@@ -0,0 +1,60 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include "INIReader.h"
+
+using namespace std;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class UnescapingProcessor {
+private:
+protected:
+	const char ESC = '\\';
+
+	std::stringstream& put(std::stringstream& result, const char& ch, int& i) {
+		result.put(ch);
+		i++;
+		return result;
+	}
+
+public:
+
+	enum class TextType {
+		SectionName,
+		SectionComment,
+		SectionTag,
+		EntryKey,
+		EntryValue,
+		EntryComment,
+		Comment,
+	};
+
+	virtual std::string unescape(const std::string& s, const TextType type) = 0;
+
+};
+
+}
+}
+}
+}