# HG changeset patch # User František Kučera # Date 1606593569 -3600 # Node ID 596a724fbb834f2f0139bca38c9d45bdaba7e606 # Parent e9aad9dd823a2d431c3d855a57582d18a2118dad configurable dialects: in separate classes diff -r e9aad9dd823a -r 596a724fbb83 src/INICommand.cpp --- a/src/INICommand.cpp Sat Nov 28 18:10:47 2020 +0100 +++ b/src/INICommand.cpp Sat Nov 28 20:59:29 2020 +0100 @@ -27,10 +27,12 @@ #include #include "INICommand.h" +#include "lib/uri.h" #include "lib/INIReader.h" #include "lib/BasicUnescapingProcessor.h" #include "lib/BackspaceUnescapingProcessor.h" #include "lib/JavaPropertiesUnescapingProcessor.h" +#include "lib/JavaPropertiesDialect.h" using namespace std; using namespace relpipe::writer; @@ -171,10 +173,11 @@ void INICommand::process(std::istream& input, std::shared_ptr writer, Configuration& configuration) { FlatINIContentHandler handler(writer, configuration); std::shared_ptr reader(INIReader::create(input)); - reader->addUnescapingProcessor(std::make_shared(), "unescape-basic", true); - reader->addUnescapingProcessor(std::make_shared(), "unescape-java-properties", false); - reader->addUnescapingProcessor(std::make_shared(false), "unescape-backspace-disorder", false); - reader->addUnescapingProcessor(std::make_shared(), "unescape-backspace", true); + reader->addUnescapingProcessor(std::make_shared(), unescaping::Basic, true); + reader->addUnescapingProcessor(std::make_shared(), unescaping::JavaProperties, false); + reader->addUnescapingProcessor(std::make_shared(false), unescaping::BackspaceDisorder, false); + reader->addUnescapingProcessor(std::make_shared(), unescaping::Backspace, true); + reader->addDialect(std::make_shared(), dialect::JavaProperties, false); reader->addHandler(&handler); // TODO: smart pointers vs. references: are we going to call addUnescapingProcessor() dynamically/conditionally or share instances? Then pointers will be better. for (ParserOptionRecipe option : configuration.parserOptions) reader->setOption(convertor.to_bytes(option.uri), convertor.to_bytes(option.value)); diff -r e9aad9dd823a -r 596a724fbb83 src/lib/Dialect.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/Dialect.h Sat Nov 28 20:59:29 2020 +0100 @@ -0,0 +1,45 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include "INIReader.h" + +using namespace std; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +class INIReader; + +class Dialect { +public: + /** + * Configure the reader by calling its setOption() method. + * + * The dialect may be also derived from another dialect (extend or modify it), + * but obviously there might be no cyclic dependencies among them. + */ + virtual void apply(INIReader& reader) = 0; + +}; + +} +} +} +} diff -r e9aad9dd823a -r 596a724fbb83 src/lib/INIReader.cpp --- a/src/lib/INIReader.cpp Sat Nov 28 18:10:47 2020 +0100 +++ b/src/lib/INIReader.cpp Sat Nov 28 20:59:29 2020 +0100 @@ -21,6 +21,7 @@ #include #include "INIReader.h" +#include "uri.h" namespace relpipe { namespace in { @@ -45,6 +46,18 @@ std::vector unescapingProcessors; + class ConfiguredDialect { + public: + std::shared_ptr dialect; + const std::string uri; + + ConfiguredDialect(std::shared_ptr dialect, const std::string uri) : dialect(dialect), uri(uri) { + } + + }; + + std::vector dialects; + /** * By default, we ignore all leading whitespace on continuing lines. * If there should be some spaces or tabs, they should be placed on the previous line before the „\“. @@ -270,21 +283,14 @@ else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)"); } - void setDialect(const std::string& name) { - if (name == "default-ini") { - // already set - } else if (name == "java-properties") { - trimLeadingSpacesOnContinuingLines = true; - allowSections = false; - allowSectionTags = false; - allowSubKeys = false; - commentSeparators = "#"; - keyValueSeparators = "=:"; - quotes = ""; - // TODO: enable unicode unescaping - } else { - throw std::invalid_argument(std::string("Unsupported INI dialect: ") + name); + void setDialect(const std::string& uri) { + for (ConfiguredDialect& d : dialects) { + if (d.uri == uri) { + d.dialect->apply(*this); + return; + } } + throw std::invalid_argument(std::string("Unsupported INI dialect: ") + uri); } bool setUnescaping(const std::string& uri, const std::string& value) { @@ -303,14 +309,14 @@ } void setOption(const std::string& uri, const std::string& value) override { - if (uri == "trim-continuing-lines") trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean - else if (uri == "allow-sections") allowSections = parseBoolean(value); - else if (uri == "allow-section-tags") allowSectionTags = parseBoolean(value); - else if (uri == "allow-sub-keys") allowSubKeys = parseBoolean(value); - else if (uri == "comment-separators") commentSeparators = value; - else if (uri == "key-value-separators") keyValueSeparators = value; - else if (uri == "quotes") quotes = value; - else if (uri == "dialect") setDialect(value); + if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean + else if (uri == option::AllowSections) allowSections = parseBoolean(value); + else if (uri == option::AllowSectionTags) allowSectionTags = parseBoolean(value); + else if (uri == option::AllowSubKeys) allowSubKeys = parseBoolean(value); + else if (uri == option::CommentSeparators) commentSeparators = value; + else if (uri == option::KeyValueSeparators) keyValueSeparators = value; + else if (uri == option::Quotes) quotes = value; + else if (uri == option::Dialect) setDialect(value); else if (setUnescaping(uri, value)); else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“"); } @@ -323,6 +329,11 @@ unescapingProcessors.push_back({processor, uri, enabledByDefault}); } + void addDialect(std::shared_ptr dialect, const std::string uri, bool enabledByDefault) override { + dialects.push_back({dialect, uri}); + if (enabledByDefault) dialect->apply(*this); + } + void process() override { for (INIContentHandler* handler : handlers) handler->startDocument(); diff -r e9aad9dd823a -r 596a724fbb83 src/lib/INIReader.h --- a/src/lib/INIReader.h Sat Nov 28 18:10:47 2020 +0100 +++ b/src/lib/INIReader.h Sat Nov 28 20:59:29 2020 +0100 @@ -21,6 +21,7 @@ #include "INIContentHandler.h" #include "UnescapingProcessor.h" +#include "Dialect.h" namespace relpipe { namespace in { @@ -47,6 +48,7 @@ */ virtual void addHandler(INIContentHandler* handler) = 0; virtual void addUnescapingProcessor(std::shared_ptr processor, const std::string uri, bool enabledByDefault) = 0; + virtual void addDialect(std::shared_ptr dialect, const std::string uri, bool enabledByDefault) = 0; virtual void process() = 0; static INIReader* create(std::istream& input); }; diff -r e9aad9dd823a -r 596a724fbb83 src/lib/JavaPropertiesDialect.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/JavaPropertiesDialect.h Sat Nov 28 20:59:29 2020 +0100 @@ -0,0 +1,49 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include "Dialect.h" +#include "uri.h" + +using namespace std; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +class JavaPropertiesDialect : public Dialect { +public: + + void apply(INIReader& reader) override { + reader.setOption(option::TrimContinuingLines, "true"); + reader.setOption(option::AllowSections, "false"); + reader.setOption(option::AllowSectionTags, "false"); + reader.setOption(option::AllowSubKeys, "false"); + reader.setOption(option::CommentSeparators, "#"); + reader.setOption(option::KeyValueSeparators, "=:"); + reader.setOption(option::Quotes, ""); + // reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor + reader.setOption(unescaping::JavaProperties, "true"); + } + +}; + +} +} +} +} diff -r e9aad9dd823a -r 596a724fbb83 src/lib/uri.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/uri.h Sat Nov 28 20:59:29 2020 +0100 @@ -0,0 +1,51 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info; + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +// TODO: these strings will become globally unique URIs (or IRIs) after moving to alt2xml and relative/unprefixed names should also work + +namespace option { +static const char* TrimContinuingLines = "trim-continuing-lines"; +static const char* AllowSections = "allow-sections"; +static const char* AllowSectionTags = "allow-section-tags"; +static const char* AllowSubKeys = "allow-sub-keys"; +static const char* CommentSeparators = "comment-separators"; +static const char* KeyValueSeparators = "key-value-separators"; +static const char* Quotes = "quotes"; +static const char* Dialect = "dialect"; +} + +namespace unescaping { +static const char* Basic = "unescape-basic"; +static const char* JavaProperties = "unescape-java-properties"; +static const char* BackspaceDisorder = "unescape-backspace-disorder"; // TODO: remove BackspaceDisorder +static const char* Backspace = "unescape-backspace"; +} + +namespace dialect { +static const char* JavaProperties = "java-properties"; +} + +} +} +} +}