configurable dialects: in separate classes v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sat, 28 Nov 2020 21:09:18 +0100
branchv_0
changeset 29 06aaad12c207
parent 28 0e7c57d48d1e
child 30 f686bdaeb9e0
configurable dialects: in separate classes
src/XMLDocumentConstructor.h
src/lib/BackspaceUnescapingProcessor.h
src/lib/Dialect.h
src/lib/INIReader.cpp
src/lib/INIReader.h
src/lib/JavaPropertiesDialect.h
src/lib/uri.h
--- a/src/XMLDocumentConstructor.h	Sat Nov 28 18:14:15 2020 +0100
+++ b/src/XMLDocumentConstructor.h	Sat Nov 28 21:09:18 2020 +0100
@@ -19,10 +19,12 @@
 #include <stdexcept>
 #include <libxml++-2.6/libxml++/libxml++.h>
 
+#include "lib/uri.h"
 #include "lib/INIReader.h"
 #include "lib/BasicUnescapingProcessor.h"
 #include "lib/BackspaceUnescapingProcessor.h"
 #include "lib/JavaPropertiesUnescapingProcessor.h"
+#include "lib/JavaPropertiesDialect.h"
 #include "lib/XMLNameCodec.h"
 
 using namespace relpipe::in::ini::lib;
@@ -112,10 +114,10 @@
 	void process() {
 		HierarchicalINIContentHandler handler(parser);
 		std::shared_ptr<INIReader> reader(INIReader::create(*input));
-		reader->addUnescapingProcessor(std::make_shared<BasicUnescapingProcessor>(), "unescape-basic", true);
-		reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), "unescape-java-properties", false);
-		reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(false), "unescape-backspace-disorder", false);
-		reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), "unescape-backspace", true);
+		reader->addUnescapingProcessor(std::make_shared<BasicUnescapingProcessor>(), unescaping::Basic, true);
+		reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), unescaping::JavaProperties, false);
+		reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), unescaping::Backspace, true);
+		reader->addDialect(std::make_shared<JavaPropertiesDialect>(), dialect::JavaProperties, false);
 		reader->addHandler(&handler);
 		// TODO: smart pointers vs. references: are we going to call addUnescapingProcessor() dynamically/conditionally or share instances? Then pointers will be better.
 		// TODO: call setOption() according to the configuration
--- a/src/lib/BackspaceUnescapingProcessor.h	Sat Nov 28 18:14:15 2020 +0100
+++ b/src/lib/BackspaceUnescapingProcessor.h	Sat Nov 28 21:09:18 2020 +0100
@@ -30,6 +30,14 @@
 
 class BackspaceUnescapingProcessor : public UnescapingProcessor {
 private:
+	/**
+	 * By default it is set to true, thus no unrecognized escape sequences may left after this stage.
+	 * Setting this to false is dangerous and may lead to errors and ambiguous behavior.
+	 * It should be used only as a last resort.
+	 * Because both "\\ \xxx" and "\ \xxx" will be converted to "\ \xxx" and the information will be lost.
+	 * So, it is usually better to keep the "\" escaped as "\\" and process both the escaped backspaces and unrecognized escape sequences later.
+	 */
+
 	const bool lastEscaphingPhase = true;
 public:
 
@@ -51,17 +59,6 @@
 		return result.str();
 	}
 
-	/**
-	 * @param lastEscaphingPhase whether this is final unescaping stage.
-	 * By default it is set to true, thus no unrecognized escape sequences may left after this stage.
-	 * Setting this to false is dangerous and may lead to errors and ambiguous behavior.
-	 * It should be used only as a last resort.
-	 * Because both "\\ \xxx" and "\ \xxx" will be converted to "\ \xxx" and the information will be lost.
-	 * So, it is usually better to keep the "\" escaped as "\\" and process both the escaped backspaces and unrecognized escape sequences later.
-	 */
-	BackspaceUnescapingProcessor(bool lastEscaphingPhase = true) : lastEscaphingPhase(lastEscaphingPhase) {
-	}
-
 };
 
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/Dialect.h	Sat Nov 28 21:09:18 2020 +0100
@@ -0,0 +1,45 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "INIReader.h"
+
+using namespace std;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class INIReader;
+
+class Dialect {
+public:
+	/**
+	 * Configure the reader by calling its setOption() method.
+	 * 
+	 * The dialect may be also derived from another dialect (extend or modify it),
+	 * but obviously there might be no cyclic dependencies among them.
+	 */
+	virtual void apply(INIReader& reader) = 0;
+
+};
+
+}
+}
+}
+}
--- a/src/lib/INIReader.cpp	Sat Nov 28 18:14:15 2020 +0100
+++ b/src/lib/INIReader.cpp	Sat Nov 28 21:09:18 2020 +0100
@@ -21,6 +21,7 @@
 #include <stdexcept>
 
 #include "INIReader.h"
+#include "uri.h"
 
 namespace relpipe {
 namespace in {
@@ -45,6 +46,18 @@
 
 	std::vector<ConfiguredUnescapingProcessor> unescapingProcessors;
 
+	class ConfiguredDialect {
+	public:
+		std::shared_ptr<Dialect> dialect;
+		const std::string uri;
+
+		ConfiguredDialect(std::shared_ptr<Dialect> dialect, const std::string uri) : dialect(dialect), uri(uri) {
+		}
+
+	};
+
+	std::vector<ConfiguredDialect> dialects;
+
 	/** 
 	 * By default, we ignore all leading whitespace on continuing lines.
 	 * If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
@@ -270,21 +283,14 @@
 		else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)");
 	}
 
-	void setDialect(const std::string& name) {
-		if (name == "default-ini") {
-			// already set
-		} else if (name == "java-properties") {
-			trimLeadingSpacesOnContinuingLines = true;
-			allowSections = false;
-			allowSectionTags = false;
-			allowSubKeys = false;
-			commentSeparators = "#";
-			keyValueSeparators = "=:";
-			quotes = "";
-			// TODO: enable unicode unescaping
-		} else {
-			throw std::invalid_argument(std::string("Unsupported INI dialect: ") + name);
+	void setDialect(const std::string& uri) {
+		for (ConfiguredDialect& d : dialects) {
+			if (d.uri == uri) {
+				d.dialect->apply(*this);
+				return;
+			}
 		}
+		throw std::invalid_argument(std::string("Unsupported INI dialect: ") + uri);
 	}
 
 	bool setUnescaping(const std::string& uri, const std::string& value) {
@@ -303,14 +309,14 @@
 	}
 
 	void setOption(const std::string& uri, const std::string& value) override {
-		if (uri == "trim-continuing-lines") trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean
-		else if (uri == "allow-sections") allowSections = parseBoolean(value);
-		else if (uri == "allow-section-tags") allowSectionTags = parseBoolean(value);
-		else if (uri == "allow-sub-keys") allowSubKeys = parseBoolean(value);
-		else if (uri == "comment-separators") commentSeparators = value;
-		else if (uri == "key-value-separators") keyValueSeparators = value;
-		else if (uri == "quotes") quotes = value;
-		else if (uri == "dialect") setDialect(value);
+		if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean
+		else if (uri == option::AllowSections) allowSections = parseBoolean(value);
+		else if (uri == option::AllowSectionTags) allowSectionTags = parseBoolean(value);
+		else if (uri == option::AllowSubKeys) allowSubKeys = parseBoolean(value);
+		else if (uri == option::CommentSeparators) commentSeparators = value;
+		else if (uri == option::KeyValueSeparators) keyValueSeparators = value;
+		else if (uri == option::Quotes) quotes = value;
+		else if (uri == option::Dialect) setDialect(value);
 		else if (setUnescaping(uri, value));
 		else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
 	}
@@ -323,6 +329,11 @@
 		unescapingProcessors.push_back({processor, uri, enabledByDefault});
 	}
 
+	void addDialect(std::shared_ptr<Dialect> dialect, const std::string uri, bool enabledByDefault) override {
+		dialects.push_back({dialect, uri});
+		if (enabledByDefault) dialect->apply(*this);
+	}
+
 	void process() override {
 		for (INIContentHandler* handler : handlers) handler->startDocument();
 
--- a/src/lib/INIReader.h	Sat Nov 28 18:14:15 2020 +0100
+++ b/src/lib/INIReader.h	Sat Nov 28 21:09:18 2020 +0100
@@ -21,6 +21,7 @@
 
 #include "INIContentHandler.h"
 #include "UnescapingProcessor.h"
+#include "Dialect.h"
 
 namespace relpipe {
 namespace in {
@@ -47,6 +48,7 @@
 	 */
 	virtual void addHandler(INIContentHandler* handler) = 0;
 	virtual void addUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabledByDefault) = 0;
+	virtual void addDialect(std::shared_ptr<Dialect> dialect, const std::string uri, bool enabledByDefault) = 0;
 	virtual void process() = 0;
 	static INIReader* create(std::istream& input);
 };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/JavaPropertiesDialect.h	Sat Nov 28 21:09:18 2020 +0100
@@ -0,0 +1,49 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "Dialect.h"
+#include "uri.h"
+
+using namespace std;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class JavaPropertiesDialect : public Dialect {
+public:
+
+	void apply(INIReader& reader) override {
+		reader.setOption(option::TrimContinuingLines, "true");
+		reader.setOption(option::AllowSections, "false");
+		reader.setOption(option::AllowSectionTags, "false");
+		reader.setOption(option::AllowSubKeys, "false");
+		reader.setOption(option::CommentSeparators, "#");
+		reader.setOption(option::KeyValueSeparators, "=:");
+		reader.setOption(option::Quotes, "");
+		// reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor
+		reader.setOption(unescaping::JavaProperties, "true");
+	}
+
+};
+
+}
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/uri.h	Sat Nov 28 21:09:18 2020 +0100
@@ -0,0 +1,50 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info;
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+// TODO: these strings will become globally unique URIs (or IRIs) after moving to alt2xml and relative/unprefixed names should also work
+
+namespace option {
+static const char* TrimContinuingLines = "trim-continuing-lines";
+static const char* AllowSections = "allow-sections";
+static const char* AllowSectionTags = "allow-section-tags";
+static const char* AllowSubKeys = "allow-sub-keys";
+static const char* CommentSeparators = "comment-separators";
+static const char* KeyValueSeparators = "key-value-separators";
+static const char* Quotes = "quotes";
+static const char* Dialect = "dialect";
+}
+
+namespace unescaping {
+static const char* Basic = "unescape-basic";
+static const char* JavaProperties = "unescape-java-properties";
+static const char* Backspace = "unescape-backspace";
+}
+
+namespace dialect {
+static const char* JavaProperties = "java-properties";
+}
+
+}
+}
+}
+}