--- a/nbproject/configurations.xml Sat Nov 28 00:46:40 2020 +0100
+++ b/nbproject/configurations.xml Sat Nov 28 18:10:47 2020 +0100
@@ -46,7 +46,7 @@
<in>INIContentHandler.h</in>
<in>INIReader.cpp</in>
<in>INIReader.h</in>
- <in>UnescapingINIHandler.h</in>
+ <in>UnescapingProcessor.h</in>
</df>
<in>INICommand.cpp</in>
<in>relpipe-in-ini.cpp</in>
@@ -147,7 +147,7 @@
</item>
<item path="src/lib/INIReader.h" ex="false" tool="3" flavor2="0">
</item>
- <item path="src/lib/UnescapingINIHandler.h" ex="false" tool="3" flavor2="0">
+ <item path="src/lib/UnescapingProcessor.h" ex="false" tool="3" flavor2="0">
</item>
</conf>
</confs>
--- a/src/INICommand.cpp Sat Nov 28 00:46:40 2020 +0100
+++ b/src/INICommand.cpp Sat Nov 28 18:10:47 2020 +0100
@@ -28,8 +28,9 @@
#include "INICommand.h"
#include "lib/INIReader.h"
-#include "lib/BasicUnescapingINIHandler.h"
-#include "lib/JavaPropertiesUnescapingINIHandler.h"
+#include "lib/BasicUnescapingProcessor.h"
+#include "lib/BackspaceUnescapingProcessor.h"
+#include "lib/JavaPropertiesUnescapingProcessor.h"
using namespace std;
using namespace relpipe::writer;
@@ -170,10 +171,13 @@
void INICommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
FlatINIContentHandler handler(writer, configuration);
std::shared_ptr<INIReader> reader(INIReader::create(input));
+ reader->addUnescapingProcessor(std::make_shared<BasicUnescapingProcessor>(), "unescape-basic", true);
+ reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), "unescape-java-properties", false);
+ reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(false), "unescape-backspace-disorder", false);
+ reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), "unescape-backspace", true);
+ reader->addHandler(&handler);
+ // TODO: smart pointers vs. references: are we going to call addUnescapingProcessor() dynamically/conditionally or share instances? Then pointers will be better.
for (ParserOptionRecipe option : configuration.parserOptions) reader->setOption(convertor.to_bytes(option.uri), convertor.to_bytes(option.value));
- BasicUnescapingINIContentHandler unescapingHandler(handler, false);
- JavaPropertiesUnescapingINIContentHandler javaHandler(unescapingHandler, true);
- reader->addHandler(&javaHandler);
reader->process();
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/BackspaceUnescapingProcessor.h Sat Nov 28 18:10:47 2020 +0100
@@ -0,0 +1,70 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include "UnescapingProcessor.h"
+
+using namespace std;
+using namespace relpipe::writer;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class BackspaceUnescapingProcessor : public UnescapingProcessor {
+private:
+ const bool lastEscaphingPhase = true;
+public:
+
+ std::string unescape(const std::string& s, const TextType type) override {
+ std::stringstream result;
+ for (int i = 0, length = s.size(); i < length; i++) {
+ char ch = s[i];
+ if (i + 1 < length && ch == ESC) {
+ ch = s[i + 1];
+ if (ch == ESC) put(result, ESC, i); // unescape \\ to \.
+ else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch);
+ else result.put(ESC); // keep the escape sequence for later unescaping phase
+ } else if (ch == ESC) {
+ throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
+ } else {
+ result.put(ch);
+ }
+ }
+ return result.str();
+ }
+
+ /**
+ * @param lastEscaphingPhase whether this is final unescaping stage.
+ * By default it is set to true, thus no unrecognized escape sequences may left after this stage.
+ * Setting this to false is dangerous and may lead to errors and ambiguous behavior.
+ * It should be used only as a last resort.
+ * Because both "\\ \xxx" and "\ \xxx" will be converted to "\ \xxx" and the information will be lost.
+ * So, it is usually better to keep the "\" escaped as "\\" and process both the escaped backspaces and unrecognized escape sequences later.
+ */
+ BackspaceUnescapingProcessor(bool lastEscaphingPhase = true) : lastEscaphingPhase(lastEscaphingPhase) {
+ }
+
+};
+
+}
+}
+}
+}
--- a/src/lib/BasicUnescapingINIHandler.h Sat Nov 28 00:46:40 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-/**
- * Relational pipes
- * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#pragma once
-
-#include <sstream>
-
-#include "UnescapingINIHandler.h"
-
-using namespace std;
-using namespace relpipe::writer;
-
-namespace relpipe {
-namespace in {
-namespace ini {
-namespace lib {
-
-class BasicUnescapingINIContentHandler : public UnescapingINIContentHandler {
-protected:
-
- virtual std::string unescape(const std::string& s) {
- std::stringstream result;
- for (int i = 0, length = s.size(); i < length; i++) {
- char ch = s[i];
- if (i + 1 < length && ch == ESC) {
- ch = s[i + 1];
- if (ch == 'n') put(result, '\n', i);
- else if (ch == 'r') put(result, '\r', i);
- else if (ch == 't') put(result, '\t', i);
- else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported.
- else if (ch == '"') put(result, ch, i); // The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value).
- else if (ch == '\'') put(result, ch, i); // So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work.
- else if (ch == ']') put(result, ch, i);
- else if (ch == ':') put(result, ch, i);
- else if (ch == ';') put(result, ch, i);
- else if (ch == '#') put(result, ch, i);
- else if (ch == '=') put(result, ch, i);
- else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
- else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \.
- else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch);
- else result.put(ESC); // keep the escape sequence for later unescaping phase
- } else if (ch == ESC) {
- throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
- } else {
- result.put(ch);
- }
- }
- return result.str();
- }
-
-public:
-
- BasicUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase) {
- }
-
-};
-
-}
-}
-}
-}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/BasicUnescapingProcessor.h Sat Nov 28 18:10:47 2020 +0100
@@ -0,0 +1,67 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include "UnescapingProcessor.h"
+
+using namespace std;
+using namespace relpipe::writer;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class BasicUnescapingProcessor : public UnescapingProcessor {
+public:
+
+ std::string unescape(const std::string& s, const TextType type) override {
+ std::stringstream result;
+ for (int i = 0, length = s.size(); i < length; i++) {
+ char ch = s[i];
+ if (i + 1 < length && ch == ESC) {
+ ch = s[i + 1];
+ if (ch == 'n') put(result, '\n', i);
+ else if (ch == 'r') put(result, '\r', i);
+ else if (ch == 't') put(result, '\t', i);
+ else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported.
+ else if (ch == '"') put(result, ch, i); // The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value).
+ else if (ch == '\'') put(result, ch, i); // So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work.
+ else if (ch == ']') put(result, ch, i);
+ else if (ch == ':') put(result, ch, i);
+ else if (ch == ';') put(result, ch, i);
+ else if (ch == '#') put(result, ch, i);
+ else if (ch == '=') put(result, ch, i);
+ else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
+ else result.put(ESC); // keep the escape sequence for later unescaping phase
+ } else if (ch == ESC) {
+ throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
+ } else {
+ result.put(ch);
+ }
+ }
+ return result.str();
+ }
+
+};
+
+}
+}
+}
+}
--- a/src/lib/INIReader.cpp Sat Nov 28 00:46:40 2020 +0100
+++ b/src/lib/INIReader.cpp Sat Nov 28 18:10:47 2020 +0100
@@ -32,6 +32,19 @@
std::istream& input;
std::vector<INIContentHandler*> handlers;
+ class ConfiguredUnescapingProcessor {
+ public:
+ std::shared_ptr<UnescapingProcessor> processor;
+ const std::string uri;
+ bool enbaled;
+
+ ConfiguredUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enbaled) : processor(processor), uri(uri), enbaled(enbaled) {
+ }
+
+ };
+
+ std::vector<ConfiguredUnescapingProcessor> unescapingProcessors;
+
/**
* By default, we ignore all leading whitespace on continuing lines.
* If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
@@ -221,6 +234,12 @@
return result;
}
+ std::string unescape(const std::string& value, UnescapingProcessor::TextType type) {
+ std::string result = value;
+ for (ConfiguredUnescapingProcessor p : unescapingProcessors) if (p.enbaled) result = p.processor->unescape(result, type);
+ return result;
+ }
+
bool isComment(char ch) {
return oneOf(ch, commentSeparators);
}
@@ -268,6 +287,16 @@
}
}
+ bool setUnescaping(const std::string& uri, const std::string& value) {
+ for (ConfiguredUnescapingProcessor& p : unescapingProcessors) {
+ if (p.uri == uri) {
+ p.enbaled = parseBoolean(value);
+ return true;
+ }
+ }
+ return false;
+ }
+
public:
INIReaderImpl(std::istream& input) : input(input) {
@@ -282,6 +311,7 @@
else if (uri == "key-value-separators") keyValueSeparators = value;
else if (uri == "quotes") quotes = value;
else if (uri == "dialect") setDialect(value);
+ else if (setUnescaping(uri, value));
else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
}
@@ -289,6 +319,10 @@
handlers.push_back(handler);
}
+ void addUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabledByDefault) override {
+ unescapingProcessors.push_back({processor, uri, enabledByDefault});
+ }
+
void process() override {
for (INIContentHandler* handler : handlers) handler->startDocument();
@@ -323,11 +357,13 @@
readAllWhitespace();
event.name = readTokenAndEatTerminator(']', "e, &found);
if (!quote) event.name = trim(event.name);
+ event.name = unescape(event.name, UnescapingProcessor::TextType::SectionName);
readSpacesAndTabs();
if (allowSectionTags && peek() == '[') {
get();
event.tag = readTokenAndEatTerminator(']', "e, &found);
+ event.tag = unescape(event.tag, UnescapingProcessor::TextType::SectionTag);
}
readSpacesAndTabs();
@@ -336,6 +372,7 @@
get();
readSpacesAndTabs();
event.comment = readUntil('\n', &found);
+ event.comment = unescape(event.comment, UnescapingProcessor::TextType::SectionComment);
} else if (ch == '\n') {
get();
} else {
@@ -350,6 +387,7 @@
get();
readSpacesAndTabs();
event.comment = readUntil('\n', &found);
+ event.comment = unescape(event.comment, UnescapingProcessor::TextType::Comment);
for (INIContentHandler* handler : handlers) handler->comment(event);
} else {
INIContentHandler::EntryEvent event;
@@ -380,9 +418,14 @@
event.key = match[1];
event.subKey = match[2];
event.fullKey = fullKey;
+ event.subKey = unescape(event.subKey, UnescapingProcessor::TextType::EntryKey);
}
}
+ event.key = unescape(event.key, UnescapingProcessor::TextType::EntryKey);
+ event.fullKey = unescape(event.fullKey, UnescapingProcessor::TextType::EntryKey);
+ event.value = unescape(event.value, UnescapingProcessor::TextType::EntryValue);
+
if (quote) {
readSpacesAndTabs();
ch = peek();
@@ -390,6 +433,7 @@
get();
readSpacesAndTabs();
event.comment = readUntil('\n', &found);
+ event.comment = unescape(event.comment, UnescapingProcessor::TextType::EntryComment);
} else if (ch == '\n') {
get();
} else {
--- a/src/lib/INIReader.h Sat Nov 28 00:46:40 2020 +0100
+++ b/src/lib/INIReader.h Sat Nov 28 18:10:47 2020 +0100
@@ -20,6 +20,7 @@
#include <istream>
#include "INIContentHandler.h"
+#include "UnescapingProcessor.h"
namespace relpipe {
namespace in {
@@ -45,6 +46,7 @@
* - but both sides will know the schema (allowed elements and attributes for INI events)
*/
virtual void addHandler(INIContentHandler* handler) = 0;
+ virtual void addUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabledByDefault) = 0;
virtual void process() = 0;
static INIReader* create(std::istream& input);
};
--- a/src/lib/JavaPropertiesUnescapingINIHandler.h Sat Nov 28 00:46:40 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-/**
- * Relational pipes
- * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#pragma once
-
-#include <sstream>
-#include <codecvt>
-#include <arpa/inet.h>
-
-#include "UnescapingINIHandler.h"
-
-using namespace std;
-using namespace relpipe::writer;
-
-namespace relpipe {
-namespace in {
-namespace ini {
-namespace lib {
-
-class JavaPropertiesUnescapingINIContentHandler : public UnescapingINIContentHandler {
-private:
- wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
-
- bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) {
- if (hexLength != binLength * 2) return false;
-
- for (size_t i = 0; i < binLength; i++) {
- uint8_t value = 0;
- char a = hexadecimal[i * 2];
- char b = hexadecimal[i * 2 + 1];
-
- if (a >= '0' && a <= '9') value += (a - '0')*16;
- else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16;
- else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16;
- else return false;
-
- if (b >= '0' && b <= '9') value += b - '0';
- else if (b >= 'a' && b <= 'f') value += b - 'a' + 10;
- else if (b >= 'A' && b <= 'F') value += b - 'A' + 10;
- else return false;
-
- if (resultBuffer) resultBuffer[i] = value;
- }
- return true;
- }
-
-protected:
-
- virtual std::string unescape(const std::string& s) {
- std::stringstream result;
- for (int i = 0, length = s.size(); i < length; i++) {
- char ch = s[i];
- if (i + 1 < length && ch == ESC) {
- ch = s[i + 1];
- if (ch == 'u') {
- // TODO: simplify, clean-up, verify (but seems working)
- i++;
- int hexLength = 4;
- if (i + hexLength < length) {
- uint16_t u16;
- bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16));
- if (hexOK) result << convertor.to_bytes(ntohs(u16));
- else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX"));
- i += hexLength;
- } else {
- throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters"));
- }
-
- } else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
- else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \.
- else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch);
- else result.put(ESC); // keep the escape sequence for later unescaping phase
- } else if (ch == ESC) {
- throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
- } else {
- result.put(ch);
- }
- }
- return result.str();
- }
-
-public:
-
- JavaPropertiesUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase, true) {
- }
-
-};
-
-}
-}
-}
-}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/JavaPropertiesUnescapingProcessor.h Sat Nov 28 18:10:47 2020 +0100
@@ -0,0 +1,101 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+#include <codecvt>
+#include <arpa/inet.h>
+
+#include "UnescapingProcessor.h"
+
+using namespace std;
+using namespace relpipe::writer;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+/**
+ * Should work according to <https://docs.oracle.com/javase/specs/jls/se15/html/jls-3.html#jls-3.3> 3.3. Unicode Escapes
+ */
+class JavaPropertiesUnescapingProcessor : public UnescapingProcessor {
+private:
+ wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
+
+ bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) {
+ if (hexLength != binLength * 2) return false;
+
+ for (size_t i = 0; i < binLength; i++) {
+ uint8_t value = 0;
+ char a = hexadecimal[i * 2];
+ char b = hexadecimal[i * 2 + 1];
+
+ if (a >= '0' && a <= '9') value += (a - '0')*16;
+ else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16;
+ else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16;
+ else return false;
+
+ if (b >= '0' && b <= '9') value += b - '0';
+ else if (b >= 'a' && b <= 'f') value += b - 'a' + 10;
+ else if (b >= 'A' && b <= 'F') value += b - 'A' + 10;
+ else return false;
+
+ if (resultBuffer) resultBuffer[i] = value;
+ }
+ return true;
+ }
+
+public:
+
+ std::string unescape(const std::string& s, const TextType type) override {
+ std::stringstream result;
+ for (int i = 0, length = s.size(); i < length; i++) {
+ char ch = s[i];
+ if (i + 1 < length && ch == ESC) {
+ ch = s[i + 1];
+ if (ch == 'u') {
+ // TODO: simplify, clean-up, verify (but seems working)
+ i++;
+ int hexLength = 4;
+ if (i + hexLength < length) {
+ uint16_t u16;
+ bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16));
+ if (hexOK) result << convertor.to_bytes(ntohs(u16));
+ else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX"));
+ i += hexLength;
+ } else {
+ throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters"));
+ }
+
+ } else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
+ else result.put(ESC); // keep the escape sequence for later unescaping phase
+ } else if (ch == ESC) {
+ throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
+ } else {
+ result.put(ch);
+ }
+ }
+ return result.str();
+ }
+
+};
+
+}
+}
+}
+}
--- a/src/lib/UnescapingINIHandler.h Sat Nov 28 00:46:40 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-/**
- * Relational pipes
- * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#pragma once
-
-#include <sstream>
-
-#include "INIReader.h"
-
-using namespace std;
-using namespace relpipe::writer;
-
-namespace relpipe {
-namespace in {
-namespace ini {
-namespace lib {
-
-class UnescapingINIContentHandler : public INIContentHandler {
-private:
- INIContentHandler& output;
- bool unescapeComments;
-
-protected:
- const char ESC = '\\';
- bool lastEscaphingPhase;
-
- std::stringstream& put(std::stringstream& result, const char& ch, int& i) {
- result.put(ch);
- i++;
- return result;
- }
-
- virtual std::string unescape(const std::string& s) = 0;
-
-public:
-
- /**
- * @param output here will be sent events with unescaped values
- * @param lastEscaphingPhase instances of UnescapingINIContentHandler might be chained:
- * unsupported escaping sequences are kept untouched to be processed in further phases;
- * in the last phase, all remaining sequences (including \\) must be recognized and unescaped
- * (otherwise the input is considered invalid and an exception is thrown)
- */
- UnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase, bool unescapeComments = false) : output(output), lastEscaphingPhase(lastEscaphingPhase), unescapeComments(unescapeComments) {
- }
-
- void startDocument() override {
- output.startDocument();
- }
-
- void endDocument() override {
- output.endDocument();
- }
-
- void startSection(const SectionStartEvent& event) override {
- SectionStartEvent e = event;
- e.name = unescape(e.name);
- if (unescapeComments) e.comment = unescape(e.comment);
- output.startSection(e);
- }
-
- void endSection() override {
- output.endSection();
- }
-
- void entry(const EntryEvent& event) override {
- EntryEvent e = event;
- e.key = unescape(e.key);
- e.fullKey = unescape(e.fullKey);
- e.subKey = unescape(e.subKey);
- e.value = unescape(e.value);
- if (unescapeComments) e.comment = unescape(e.comment);
- output.entry(e);
- }
-
- void comment(const CommentEvent& event) override {
- if (unescapeComments) {
- CommentEvent e = event;
- e.comment = unescape(e.comment);
- output.comment(e);
- } else {
- output.comment(event);
- }
- }
-
- void whitespace(const WhitespaceEvent& event) override {
- output.whitespace(event);
- }
-
-};
-
-}
-}
-}
-}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/UnescapingProcessor.h Sat Nov 28 18:10:47 2020 +0100
@@ -0,0 +1,60 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include "INIReader.h"
+
+using namespace std;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class UnescapingProcessor {
+private:
+protected:
+ const char ESC = '\\';
+
+ std::stringstream& put(std::stringstream& result, const char& ch, int& i) {
+ result.put(ch);
+ i++;
+ return result;
+ }
+
+public:
+
+ enum class TextType {
+ SectionName,
+ SectionComment,
+ SectionTag,
+ EntryKey,
+ EntryValue,
+ EntryComment,
+ Comment,
+ };
+
+ virtual std::string unescape(const std::string& s, const TextType type) = 0;
+
+};
+
+}
+}
+}
+}