# HG changeset patch # User František Kučera # Date 1606583447 -3600 # Node ID e9aad9dd823a2d431c3d855a57582d18a2118dad # Parent b1f6fa3a655562df0347f8ffd43c36b0304fd4f2 configurable unescaping processors diff -r b1f6fa3a6555 -r e9aad9dd823a nbproject/configurations.xml --- a/nbproject/configurations.xml Sat Nov 28 00:46:40 2020 +0100 +++ b/nbproject/configurations.xml Sat Nov 28 18:10:47 2020 +0100 @@ -46,7 +46,7 @@ INIContentHandler.h INIReader.cpp INIReader.h - UnescapingINIHandler.h + UnescapingProcessor.h INICommand.cpp relpipe-in-ini.cpp @@ -147,7 +147,7 @@ - + diff -r b1f6fa3a6555 -r e9aad9dd823a src/INICommand.cpp --- a/src/INICommand.cpp Sat Nov 28 00:46:40 2020 +0100 +++ b/src/INICommand.cpp Sat Nov 28 18:10:47 2020 +0100 @@ -28,8 +28,9 @@ #include "INICommand.h" #include "lib/INIReader.h" -#include "lib/BasicUnescapingINIHandler.h" -#include "lib/JavaPropertiesUnescapingINIHandler.h" +#include "lib/BasicUnescapingProcessor.h" +#include "lib/BackspaceUnescapingProcessor.h" +#include "lib/JavaPropertiesUnescapingProcessor.h" using namespace std; using namespace relpipe::writer; @@ -170,10 +171,13 @@ void INICommand::process(std::istream& input, std::shared_ptr writer, Configuration& configuration) { FlatINIContentHandler handler(writer, configuration); std::shared_ptr reader(INIReader::create(input)); + reader->addUnescapingProcessor(std::make_shared(), "unescape-basic", true); + reader->addUnescapingProcessor(std::make_shared(), "unescape-java-properties", false); + reader->addUnescapingProcessor(std::make_shared(false), "unescape-backspace-disorder", false); + reader->addUnescapingProcessor(std::make_shared(), "unescape-backspace", true); + reader->addHandler(&handler); + // TODO: smart pointers vs. references: are we going to call addUnescapingProcessor() dynamically/conditionally or share instances? Then pointers will be better. for (ParserOptionRecipe option : configuration.parserOptions) reader->setOption(convertor.to_bytes(option.uri), convertor.to_bytes(option.value)); - BasicUnescapingINIContentHandler unescapingHandler(handler, false); - JavaPropertiesUnescapingINIContentHandler javaHandler(unescapingHandler, true); - reader->addHandler(&javaHandler); reader->process(); } diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/BackspaceUnescapingProcessor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/BackspaceUnescapingProcessor.h Sat Nov 28 18:10:47 2020 +0100 @@ -0,0 +1,70 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include + +#include "UnescapingProcessor.h" + +using namespace std; +using namespace relpipe::writer; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +class BackspaceUnescapingProcessor : public UnescapingProcessor { +private: + const bool lastEscaphingPhase = true; +public: + + std::string unescape(const std::string& s, const TextType type) override { + std::stringstream result; + for (int i = 0, length = s.size(); i < length; i++) { + char ch = s[i]; + if (i + 1 < length && ch == ESC) { + ch = s[i + 1]; + if (ch == ESC) put(result, ESC, i); // unescape \\ to \. + else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch); + else result.put(ESC); // keep the escape sequence for later unescaping phase + } else if (ch == ESC) { + throw std::logic_error(std::string("Missing escape sequence")); // this should not happen + } else { + result.put(ch); + } + } + return result.str(); + } + + /** + * @param lastEscaphingPhase whether this is final unescaping stage. + * By default it is set to true, thus no unrecognized escape sequences may left after this stage. + * Setting this to false is dangerous and may lead to errors and ambiguous behavior. + * It should be used only as a last resort. + * Because both "\\ \xxx" and "\ \xxx" will be converted to "\ \xxx" and the information will be lost. + * So, it is usually better to keep the "\" escaped as "\\" and process both the escaped backspaces and unrecognized escape sequences later. + */ + BackspaceUnescapingProcessor(bool lastEscaphingPhase = true) : lastEscaphingPhase(lastEscaphingPhase) { + } + +}; + +} +} +} +} diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/BasicUnescapingINIHandler.h --- a/src/lib/BasicUnescapingINIHandler.h Sat Nov 28 00:46:40 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,74 +0,0 @@ -/** - * Relational pipes - * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#pragma once - -#include - -#include "UnescapingINIHandler.h" - -using namespace std; -using namespace relpipe::writer; - -namespace relpipe { -namespace in { -namespace ini { -namespace lib { - -class BasicUnescapingINIContentHandler : public UnescapingINIContentHandler { -protected: - - virtual std::string unescape(const std::string& s) { - std::stringstream result; - for (int i = 0, length = s.size(); i < length; i++) { - char ch = s[i]; - if (i + 1 < length && ch == ESC) { - ch = s[i + 1]; - if (ch == 'n') put(result, '\n', i); - else if (ch == 'r') put(result, '\r', i); - else if (ch == 't') put(result, '\t', i); - else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported. - else if (ch == '"') put(result, ch, i); // The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value). - else if (ch == '\'') put(result, ch, i); // So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work. - else if (ch == ']') put(result, ch, i); - else if (ch == ':') put(result, ch, i); - else if (ch == ';') put(result, ch, i); - else if (ch == '#') put(result, ch, i); - else if (ch == '=') put(result, ch, i); - else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle - else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \. - else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch); - else result.put(ESC); // keep the escape sequence for later unescaping phase - } else if (ch == ESC) { - throw std::logic_error(std::string("Missing escape sequence")); // this should not happen - } else { - result.put(ch); - } - } - return result.str(); - } - -public: - - BasicUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase) { - } - -}; - -} -} -} -} diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/BasicUnescapingProcessor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/BasicUnescapingProcessor.h Sat Nov 28 18:10:47 2020 +0100 @@ -0,0 +1,67 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include + +#include "UnescapingProcessor.h" + +using namespace std; +using namespace relpipe::writer; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +class BasicUnescapingProcessor : public UnescapingProcessor { +public: + + std::string unescape(const std::string& s, const TextType type) override { + std::stringstream result; + for (int i = 0, length = s.size(); i < length; i++) { + char ch = s[i]; + if (i + 1 < length && ch == ESC) { + ch = s[i + 1]; + if (ch == 'n') put(result, '\n', i); + else if (ch == 'r') put(result, '\r', i); + else if (ch == 't') put(result, '\t', i); + else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported. + else if (ch == '"') put(result, ch, i); // The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value). + else if (ch == '\'') put(result, ch, i); // So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work. + else if (ch == ']') put(result, ch, i); + else if (ch == ':') put(result, ch, i); + else if (ch == ';') put(result, ch, i); + else if (ch == '#') put(result, ch, i); + else if (ch == '=') put(result, ch, i); + else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle + else result.put(ESC); // keep the escape sequence for later unescaping phase + } else if (ch == ESC) { + throw std::logic_error(std::string("Missing escape sequence")); // this should not happen + } else { + result.put(ch); + } + } + return result.str(); + } + +}; + +} +} +} +} diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/INIReader.cpp --- a/src/lib/INIReader.cpp Sat Nov 28 00:46:40 2020 +0100 +++ b/src/lib/INIReader.cpp Sat Nov 28 18:10:47 2020 +0100 @@ -32,6 +32,19 @@ std::istream& input; std::vector handlers; + class ConfiguredUnescapingProcessor { + public: + std::shared_ptr processor; + const std::string uri; + bool enbaled; + + ConfiguredUnescapingProcessor(std::shared_ptr processor, const std::string uri, bool enbaled) : processor(processor), uri(uri), enbaled(enbaled) { + } + + }; + + std::vector unescapingProcessors; + /** * By default, we ignore all leading whitespace on continuing lines. * If there should be some spaces or tabs, they should be placed on the previous line before the „\“. @@ -221,6 +234,12 @@ return result; } + std::string unescape(const std::string& value, UnescapingProcessor::TextType type) { + std::string result = value; + for (ConfiguredUnescapingProcessor p : unescapingProcessors) if (p.enbaled) result = p.processor->unescape(result, type); + return result; + } + bool isComment(char ch) { return oneOf(ch, commentSeparators); } @@ -268,6 +287,16 @@ } } + bool setUnescaping(const std::string& uri, const std::string& value) { + for (ConfiguredUnescapingProcessor& p : unescapingProcessors) { + if (p.uri == uri) { + p.enbaled = parseBoolean(value); + return true; + } + } + return false; + } + public: INIReaderImpl(std::istream& input) : input(input) { @@ -282,6 +311,7 @@ else if (uri == "key-value-separators") keyValueSeparators = value; else if (uri == "quotes") quotes = value; else if (uri == "dialect") setDialect(value); + else if (setUnescaping(uri, value)); else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“"); } @@ -289,6 +319,10 @@ handlers.push_back(handler); } + void addUnescapingProcessor(std::shared_ptr processor, const std::string uri, bool enabledByDefault) override { + unescapingProcessors.push_back({processor, uri, enabledByDefault}); + } + void process() override { for (INIContentHandler* handler : handlers) handler->startDocument(); @@ -323,11 +357,13 @@ readAllWhitespace(); event.name = readTokenAndEatTerminator(']', "e, &found); if (!quote) event.name = trim(event.name); + event.name = unescape(event.name, UnescapingProcessor::TextType::SectionName); readSpacesAndTabs(); if (allowSectionTags && peek() == '[') { get(); event.tag = readTokenAndEatTerminator(']', "e, &found); + event.tag = unescape(event.tag, UnescapingProcessor::TextType::SectionTag); } readSpacesAndTabs(); @@ -336,6 +372,7 @@ get(); readSpacesAndTabs(); event.comment = readUntil('\n', &found); + event.comment = unescape(event.comment, UnescapingProcessor::TextType::SectionComment); } else if (ch == '\n') { get(); } else { @@ -350,6 +387,7 @@ get(); readSpacesAndTabs(); event.comment = readUntil('\n', &found); + event.comment = unescape(event.comment, UnescapingProcessor::TextType::Comment); for (INIContentHandler* handler : handlers) handler->comment(event); } else { INIContentHandler::EntryEvent event; @@ -380,9 +418,14 @@ event.key = match[1]; event.subKey = match[2]; event.fullKey = fullKey; + event.subKey = unescape(event.subKey, UnescapingProcessor::TextType::EntryKey); } } + event.key = unescape(event.key, UnescapingProcessor::TextType::EntryKey); + event.fullKey = unescape(event.fullKey, UnescapingProcessor::TextType::EntryKey); + event.value = unescape(event.value, UnescapingProcessor::TextType::EntryValue); + if (quote) { readSpacesAndTabs(); ch = peek(); @@ -390,6 +433,7 @@ get(); readSpacesAndTabs(); event.comment = readUntil('\n', &found); + event.comment = unescape(event.comment, UnescapingProcessor::TextType::EntryComment); } else if (ch == '\n') { get(); } else { diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/INIReader.h --- a/src/lib/INIReader.h Sat Nov 28 00:46:40 2020 +0100 +++ b/src/lib/INIReader.h Sat Nov 28 18:10:47 2020 +0100 @@ -20,6 +20,7 @@ #include #include "INIContentHandler.h" +#include "UnescapingProcessor.h" namespace relpipe { namespace in { @@ -45,6 +46,7 @@ * - but both sides will know the schema (allowed elements and attributes for INI events) */ virtual void addHandler(INIContentHandler* handler) = 0; + virtual void addUnescapingProcessor(std::shared_ptr processor, const std::string uri, bool enabledByDefault) = 0; virtual void process() = 0; static INIReader* create(std::istream& input); }; diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/JavaPropertiesUnescapingINIHandler.h --- a/src/lib/JavaPropertiesUnescapingINIHandler.h Sat Nov 28 00:46:40 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -/** - * Relational pipes - * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#pragma once - -#include -#include -#include - -#include "UnescapingINIHandler.h" - -using namespace std; -using namespace relpipe::writer; - -namespace relpipe { -namespace in { -namespace ini { -namespace lib { - -class JavaPropertiesUnescapingINIContentHandler : public UnescapingINIContentHandler { -private: - wstring_convert < codecvt_utf8> convertor; // INI parser works with UTF-8 - - bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) { - if (hexLength != binLength * 2) return false; - - for (size_t i = 0; i < binLength; i++) { - uint8_t value = 0; - char a = hexadecimal[i * 2]; - char b = hexadecimal[i * 2 + 1]; - - if (a >= '0' && a <= '9') value += (a - '0')*16; - else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16; - else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16; - else return false; - - if (b >= '0' && b <= '9') value += b - '0'; - else if (b >= 'a' && b <= 'f') value += b - 'a' + 10; - else if (b >= 'A' && b <= 'F') value += b - 'A' + 10; - else return false; - - if (resultBuffer) resultBuffer[i] = value; - } - return true; - } - -protected: - - virtual std::string unescape(const std::string& s) { - std::stringstream result; - for (int i = 0, length = s.size(); i < length; i++) { - char ch = s[i]; - if (i + 1 < length && ch == ESC) { - ch = s[i + 1]; - if (ch == 'u') { - // TODO: simplify, clean-up, verify (but seems working) - i++; - int hexLength = 4; - if (i + hexLength < length) { - uint16_t u16; - bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16)); - if (hexOK) result << convertor.to_bytes(ntohs(u16)); - else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX")); - i += hexLength; - } else { - throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters")); - } - - } else if (ch == ESC && !lastEscaphingPhase) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle - else if (ch == ESC && lastEscaphingPhase) put(result, ESC, i); // unescape \\ to \. - else if (lastEscaphingPhase) throw std::logic_error(std::string("Unsupported escape sequence: ") + ch); - else result.put(ESC); // keep the escape sequence for later unescaping phase - } else if (ch == ESC) { - throw std::logic_error(std::string("Missing escape sequence")); // this should not happen - } else { - result.put(ch); - } - } - return result.str(); - } - -public: - - JavaPropertiesUnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase) : UnescapingINIContentHandler(output, lastEscaphingPhase, true) { - } - -}; - -} -} -} -} diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/JavaPropertiesUnescapingProcessor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/JavaPropertiesUnescapingProcessor.h Sat Nov 28 18:10:47 2020 +0100 @@ -0,0 +1,101 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include + +#include "UnescapingProcessor.h" + +using namespace std; +using namespace relpipe::writer; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +/** + * Should work according to 3.3. Unicode Escapes + */ +class JavaPropertiesUnescapingProcessor : public UnescapingProcessor { +private: + wstring_convert < codecvt_utf8> convertor; // INI parser works with UTF-8 + + bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) { + if (hexLength != binLength * 2) return false; + + for (size_t i = 0; i < binLength; i++) { + uint8_t value = 0; + char a = hexadecimal[i * 2]; + char b = hexadecimal[i * 2 + 1]; + + if (a >= '0' && a <= '9') value += (a - '0')*16; + else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16; + else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16; + else return false; + + if (b >= '0' && b <= '9') value += b - '0'; + else if (b >= 'a' && b <= 'f') value += b - 'a' + 10; + else if (b >= 'A' && b <= 'F') value += b - 'A' + 10; + else return false; + + if (resultBuffer) resultBuffer[i] = value; + } + return true; + } + +public: + + std::string unescape(const std::string& s, const TextType type) override { + std::stringstream result; + for (int i = 0, length = s.size(); i < length; i++) { + char ch = s[i]; + if (i + 1 < length && ch == ESC) { + ch = s[i + 1]; + if (ch == 'u') { + // TODO: simplify, clean-up, verify (but seems working) + i++; + int hexLength = 4; + if (i + hexLength < length) { + uint16_t u16; + bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16)); + if (hexOK) result << convertor.to_bytes(ntohs(u16)); + else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX")); + i += hexLength; + } else { + throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters")); + } + + } else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle + else result.put(ESC); // keep the escape sequence for later unescaping phase + } else if (ch == ESC) { + throw std::logic_error(std::string("Missing escape sequence")); // this should not happen + } else { + result.put(ch); + } + } + return result.str(); + } + +}; + +} +} +} +} diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/UnescapingINIHandler.h --- a/src/lib/UnescapingINIHandler.h Sat Nov 28 00:46:40 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -/** - * Relational pipes - * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#pragma once - -#include - -#include "INIReader.h" - -using namespace std; -using namespace relpipe::writer; - -namespace relpipe { -namespace in { -namespace ini { -namespace lib { - -class UnescapingINIContentHandler : public INIContentHandler { -private: - INIContentHandler& output; - bool unescapeComments; - -protected: - const char ESC = '\\'; - bool lastEscaphingPhase; - - std::stringstream& put(std::stringstream& result, const char& ch, int& i) { - result.put(ch); - i++; - return result; - } - - virtual std::string unescape(const std::string& s) = 0; - -public: - - /** - * @param output here will be sent events with unescaped values - * @param lastEscaphingPhase instances of UnescapingINIContentHandler might be chained: - * unsupported escaping sequences are kept untouched to be processed in further phases; - * in the last phase, all remaining sequences (including \\) must be recognized and unescaped - * (otherwise the input is considered invalid and an exception is thrown) - */ - UnescapingINIContentHandler(INIContentHandler& output, bool lastEscaphingPhase, bool unescapeComments = false) : output(output), lastEscaphingPhase(lastEscaphingPhase), unescapeComments(unescapeComments) { - } - - void startDocument() override { - output.startDocument(); - } - - void endDocument() override { - output.endDocument(); - } - - void startSection(const SectionStartEvent& event) override { - SectionStartEvent e = event; - e.name = unescape(e.name); - if (unescapeComments) e.comment = unescape(e.comment); - output.startSection(e); - } - - void endSection() override { - output.endSection(); - } - - void entry(const EntryEvent& event) override { - EntryEvent e = event; - e.key = unescape(e.key); - e.fullKey = unescape(e.fullKey); - e.subKey = unescape(e.subKey); - e.value = unescape(e.value); - if (unescapeComments) e.comment = unescape(e.comment); - output.entry(e); - } - - void comment(const CommentEvent& event) override { - if (unescapeComments) { - CommentEvent e = event; - e.comment = unescape(e.comment); - output.comment(e); - } else { - output.comment(event); - } - } - - void whitespace(const WhitespaceEvent& event) override { - output.whitespace(event); - } - -}; - -} -} -} -} diff -r b1f6fa3a6555 -r e9aad9dd823a src/lib/UnescapingProcessor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/UnescapingProcessor.h Sat Nov 28 18:10:47 2020 +0100 @@ -0,0 +1,60 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include + +#include "INIReader.h" + +using namespace std; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +class UnescapingProcessor { +private: +protected: + const char ESC = '\\'; + + std::stringstream& put(std::stringstream& result, const char& ch, int& i) { + result.put(ch); + i++; + return result; + } + +public: + + enum class TextType { + SectionName, + SectionComment, + SectionTag, + EntryKey, + EntryValue, + EntryComment, + Comment, + }; + + virtual std::string unescape(const std::string& s, const TextType type) = 0; + +}; + +} +} +} +}