configurable dialects: in separate classes v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sat, 28 Nov 2020 20:59:29 +0100
branchv_0
changeset 28 596a724fbb83
parent 27 e9aad9dd823a
child 29 f0bf2755249f
configurable dialects: in separate classes
src/INICommand.cpp
src/lib/Dialect.h
src/lib/INIReader.cpp
src/lib/INIReader.h
src/lib/JavaPropertiesDialect.h
src/lib/uri.h
--- a/src/INICommand.cpp	Sat Nov 28 18:10:47 2020 +0100
+++ b/src/INICommand.cpp	Sat Nov 28 20:59:29 2020 +0100
@@ -27,10 +27,12 @@
 #include <relpipe/cli/CLI.h>
 
 #include "INICommand.h"
+#include "lib/uri.h"
 #include "lib/INIReader.h"
 #include "lib/BasicUnescapingProcessor.h"
 #include "lib/BackspaceUnescapingProcessor.h"
 #include "lib/JavaPropertiesUnescapingProcessor.h"
+#include "lib/JavaPropertiesDialect.h"
 
 using namespace std;
 using namespace relpipe::writer;
@@ -171,10 +173,11 @@
 void INICommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
 	FlatINIContentHandler handler(writer, configuration);
 	std::shared_ptr<INIReader> reader(INIReader::create(input));
-	reader->addUnescapingProcessor(std::make_shared<BasicUnescapingProcessor>(), "unescape-basic", true);
-	reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), "unescape-java-properties", false);
-	reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(false), "unescape-backspace-disorder", false);
-	reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), "unescape-backspace", true);
+	reader->addUnescapingProcessor(std::make_shared<BasicUnescapingProcessor>(), unescaping::Basic, true);
+	reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), unescaping::JavaProperties, false);
+	reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(false), unescaping::BackspaceDisorder, false);
+	reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), unescaping::Backspace, true);
+	reader->addDialect(std::make_shared<JavaPropertiesDialect>(), dialect::JavaProperties, false);
 	reader->addHandler(&handler);
 	// TODO: smart pointers vs. references: are we going to call addUnescapingProcessor() dynamically/conditionally or share instances? Then pointers will be better.
 	for (ParserOptionRecipe option : configuration.parserOptions) reader->setOption(convertor.to_bytes(option.uri), convertor.to_bytes(option.value));
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/Dialect.h	Sat Nov 28 20:59:29 2020 +0100
@@ -0,0 +1,45 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "INIReader.h"
+
+using namespace std;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class INIReader;
+
+class Dialect {
+public:
+	/**
+	 * Configure the reader by calling its setOption() method.
+	 * 
+	 * The dialect may be also derived from another dialect (extend or modify it),
+	 * but obviously there might be no cyclic dependencies among them.
+	 */
+	virtual void apply(INIReader& reader) = 0;
+
+};
+
+}
+}
+}
+}
--- a/src/lib/INIReader.cpp	Sat Nov 28 18:10:47 2020 +0100
+++ b/src/lib/INIReader.cpp	Sat Nov 28 20:59:29 2020 +0100
@@ -21,6 +21,7 @@
 #include <stdexcept>
 
 #include "INIReader.h"
+#include "uri.h"
 
 namespace relpipe {
 namespace in {
@@ -45,6 +46,18 @@
 
 	std::vector<ConfiguredUnescapingProcessor> unescapingProcessors;
 
+	class ConfiguredDialect {
+	public:
+		std::shared_ptr<Dialect> dialect;
+		const std::string uri;
+
+		ConfiguredDialect(std::shared_ptr<Dialect> dialect, const std::string uri) : dialect(dialect), uri(uri) {
+		}
+
+	};
+
+	std::vector<ConfiguredDialect> dialects;
+
 	/** 
 	 * By default, we ignore all leading whitespace on continuing lines.
 	 * If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
@@ -270,21 +283,14 @@
 		else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)");
 	}
 
-	void setDialect(const std::string& name) {
-		if (name == "default-ini") {
-			// already set
-		} else if (name == "java-properties") {
-			trimLeadingSpacesOnContinuingLines = true;
-			allowSections = false;
-			allowSectionTags = false;
-			allowSubKeys = false;
-			commentSeparators = "#";
-			keyValueSeparators = "=:";
-			quotes = "";
-			// TODO: enable unicode unescaping
-		} else {
-			throw std::invalid_argument(std::string("Unsupported INI dialect: ") + name);
+	void setDialect(const std::string& uri) {
+		for (ConfiguredDialect& d : dialects) {
+			if (d.uri == uri) {
+				d.dialect->apply(*this);
+				return;
+			}
 		}
+		throw std::invalid_argument(std::string("Unsupported INI dialect: ") + uri);
 	}
 
 	bool setUnescaping(const std::string& uri, const std::string& value) {
@@ -303,14 +309,14 @@
 	}
 
 	void setOption(const std::string& uri, const std::string& value) override {
-		if (uri == "trim-continuing-lines") trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean
-		else if (uri == "allow-sections") allowSections = parseBoolean(value);
-		else if (uri == "allow-section-tags") allowSectionTags = parseBoolean(value);
-		else if (uri == "allow-sub-keys") allowSubKeys = parseBoolean(value);
-		else if (uri == "comment-separators") commentSeparators = value;
-		else if (uri == "key-value-separators") keyValueSeparators = value;
-		else if (uri == "quotes") quotes = value;
-		else if (uri == "dialect") setDialect(value);
+		if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean
+		else if (uri == option::AllowSections) allowSections = parseBoolean(value);
+		else if (uri == option::AllowSectionTags) allowSectionTags = parseBoolean(value);
+		else if (uri == option::AllowSubKeys) allowSubKeys = parseBoolean(value);
+		else if (uri == option::CommentSeparators) commentSeparators = value;
+		else if (uri == option::KeyValueSeparators) keyValueSeparators = value;
+		else if (uri == option::Quotes) quotes = value;
+		else if (uri == option::Dialect) setDialect(value);
 		else if (setUnescaping(uri, value));
 		else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
 	}
@@ -323,6 +329,11 @@
 		unescapingProcessors.push_back({processor, uri, enabledByDefault});
 	}
 
+	void addDialect(std::shared_ptr<Dialect> dialect, const std::string uri, bool enabledByDefault) override {
+		dialects.push_back({dialect, uri});
+		if (enabledByDefault) dialect->apply(*this);
+	}
+
 	void process() override {
 		for (INIContentHandler* handler : handlers) handler->startDocument();
 
--- a/src/lib/INIReader.h	Sat Nov 28 18:10:47 2020 +0100
+++ b/src/lib/INIReader.h	Sat Nov 28 20:59:29 2020 +0100
@@ -21,6 +21,7 @@
 
 #include "INIContentHandler.h"
 #include "UnescapingProcessor.h"
+#include "Dialect.h"
 
 namespace relpipe {
 namespace in {
@@ -47,6 +48,7 @@
 	 */
 	virtual void addHandler(INIContentHandler* handler) = 0;
 	virtual void addUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabledByDefault) = 0;
+	virtual void addDialect(std::shared_ptr<Dialect> dialect, const std::string uri, bool enabledByDefault) = 0;
 	virtual void process() = 0;
 	static INIReader* create(std::istream& input);
 };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/JavaPropertiesDialect.h	Sat Nov 28 20:59:29 2020 +0100
@@ -0,0 +1,49 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "Dialect.h"
+#include "uri.h"
+
+using namespace std;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class JavaPropertiesDialect : public Dialect {
+public:
+
+	void apply(INIReader& reader) override {
+		reader.setOption(option::TrimContinuingLines, "true");
+		reader.setOption(option::AllowSections, "false");
+		reader.setOption(option::AllowSectionTags, "false");
+		reader.setOption(option::AllowSubKeys, "false");
+		reader.setOption(option::CommentSeparators, "#");
+		reader.setOption(option::KeyValueSeparators, "=:");
+		reader.setOption(option::Quotes, "");
+		// reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor
+		reader.setOption(unescaping::JavaProperties, "true");
+	}
+
+};
+
+}
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/uri.h	Sat Nov 28 20:59:29 2020 +0100
@@ -0,0 +1,51 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info;
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+// TODO: these strings will become globally unique URIs (or IRIs) after moving to alt2xml and relative/unprefixed names should also work
+
+namespace option {
+static const char* TrimContinuingLines = "trim-continuing-lines";
+static const char* AllowSections = "allow-sections";
+static const char* AllowSectionTags = "allow-section-tags";
+static const char* AllowSubKeys = "allow-sub-keys";
+static const char* CommentSeparators = "comment-separators";
+static const char* KeyValueSeparators = "key-value-separators";
+static const char* Quotes = "quotes";
+static const char* Dialect = "dialect";
+}
+
+namespace unescaping {
+static const char* Basic = "unescape-basic";
+static const char* JavaProperties = "unescape-java-properties";
+static const char* BackspaceDisorder = "unescape-backspace-disorder"; // TODO: remove BackspaceDisorder
+static const char* Backspace = "unescape-backspace";
+}
+
+namespace dialect {
+static const char* JavaProperties = "java-properties";
+}
+
+}
+}
+}
+}