configurable/modular dialects and escaping v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sat, 12 Dec 2020 00:01:57 +0100
branchv_0
changeset 3 ae8775e0bc7a
parent 2 e753a7f967c8
child 4 372b161669e4
configurable/modular dialects and escaping
bash-completion.sh
src/BasicEscapingProcessor.h
src/Dialect.h
src/EscapingProcessor.h
src/INIDispatchHandler.h
src/INIWriter.h
src/JavaManifestMFDialect.h
src/JavaPropertiesDialect.h
src/JavaPropertiesEscapingProcessor.h
src/uri.h
--- a/bash-completion.sh	Fri Dec 11 12:34:42 2020 +0100
+++ b/bash-completion.sh	Sat Dec 12 00:01:57 2020 +0100
@@ -37,6 +37,7 @@
 
 	DIALECTS=(
 		"java-properties"
+		"java-manifest-mf"
 	);
 
 	WRITER_OPTIONS=(
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/BasicEscapingProcessor.h	Sat Dec 12 00:01:57 2020 +0100
@@ -0,0 +1,54 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include <relpipe/common/type/typedefs.h>
+
+#include "EscapingProcessor.h"
+
+
+namespace relpipe {
+namespace out {
+namespace ini {
+
+class BasicEscapingProcessor : public EscapingProcessor {
+private:
+public:
+
+	relpipe::common::type::StringX escape(const relpipe::common::type::StringX& s, const TextType textType, const QuotingType quotingType) override {
+		std::wstringstream result;
+
+		for (auto ch : s) {
+			if (ch == L'\\') result.put(ESC).put(ESC);
+			else if (ch == L'\n') result.put(ESC).put(L'n');
+			else if (ch == L'\r');
+			else if (ch == L'\t') result.put(ESC).put(L't');
+			else if (ch == L'"' && quotingType != QuotingType::Apostrophes) result.put(ESC).put(ch);
+			else if (ch == L'\'' && quotingType != QuotingType::Quotes) result.put(ESC).put(ch);
+			else result.put(ch);
+		}
+
+		return result.str();
+	}
+
+};
+
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Dialect.h	Sat Dec 12 00:01:57 2020 +0100
@@ -0,0 +1,44 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "INIWriter.h"
+
+namespace relpipe {
+namespace out {
+namespace ini {
+
+class INIWriter;
+
+class Dialect {
+public:
+	/**
+	 * Configure the writer by calling its setOption() method.
+	 * 
+	 * The dialect may be also derived from another dialect (extend or modify it),
+	 * but obviously there might be no cyclic dependencies among them.
+	 */
+	virtual void apply(INIWriter& writer) = 0;
+
+	virtual ~Dialect() {
+	}
+
+};
+
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/EscapingProcessor.h	Sat Dec 12 00:01:57 2020 +0100
@@ -0,0 +1,60 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include <relpipe/common/type/typedefs.h>
+
+
+namespace relpipe {
+namespace out {
+namespace ini {
+
+class EscapingProcessor {
+private:
+protected:
+	const char ESC = '\\';
+
+public:
+
+	enum class TextType {
+		SectionName,
+		SectionTag,
+		SectionComment,
+		EntryKey,
+		EntrySubKey,
+		EntryValue,
+		EntryComment,
+		StandaloneComment,
+	};
+	
+	enum class QuotingType {
+		None,
+		Quotes,
+		Apostrophes,
+	};
+
+	virtual relpipe::common::type::StringX escape(const relpipe::common::type::StringX& s, const TextType textType, const QuotingType quotingType) = 0;
+
+	virtual ~EscapingProcessor() {
+	}
+};
+
+}
+}
+}
--- a/src/INIDispatchHandler.h	Fri Dec 11 12:34:42 2020 +0100
+++ b/src/INIDispatchHandler.h	Sat Dec 12 00:01:57 2020 +0100
@@ -36,6 +36,11 @@
 #include "INIStandardHandler.h"
 #include "INILiteralHandler.h"
 #include "INISectionFirstHandler.h"
+#include "JavaPropertiesDialect.h"
+#include "JavaManifestMFDialect.h"
+#include "JavaPropertiesEscapingProcessor.h"
+#include "BasicEscapingProcessor.h"
+#include "uri.h"
 
 namespace relpipe {
 namespace out {
@@ -80,6 +85,10 @@
 public:
 
 	INIDispatchHandler(std::ostream& output, Configuration& configuration) : output(output), writer(output), configuration(configuration) {
+		writer.addDialect(std::make_shared<JavaPropertiesDialect>(), dialect::JavaProperties, false);
+		writer.addDialect(std::make_shared<JavaManifestMFDialect>(), dialect::JavaManifestMF, false);
+		writer.addEscapingProcessor(std::make_shared<BasicEscapingProcessor>(), escaping::Basic, true);
+		writer.addEscapingProcessor(std::make_shared<JavaPropertiesEscapingProcessor>(), escaping::JavaProperties, false);
 		for (auto o : configuration.writerOptions) writer.setOption(o.uri, o.value);
 	}
 
--- a/src/INIWriter.h	Fri Dec 11 12:34:42 2020 +0100
+++ b/src/INIWriter.h	Sat Dec 12 00:01:57 2020 +0100
@@ -22,6 +22,10 @@
 #include <relpipe/common/type/typedefs.h>
 #include <relpipe/reader/RelpipeReaderException.h>
 
+#include "uri.h"
+#include "Dialect.h"
+#include "EscapingProcessor.h"
+
 namespace relpipe {
 namespace out {
 namespace ini {
@@ -35,34 +39,68 @@
 	std::string commentSeparatorForSections = " ; ";
 	std::string commentSeparatorForEntries = " ; ";
 	std::string commentSeparatorStandalone = "; ";
-	
+
 	bool hasContent = false;
 
-	enum class TokenType {
-		SectionName,
-		SectionTag,
-		SectionComment,
-		EntryKey,
-		EntrySubKey,
-		EntryValue,
-		EntryComment,
-		StandaloneComment,
+	/**
+	 * TODO: use a common method
+	 */
+	bool parseBoolean(const relpipe::common::type::StringX& value) {
+		if (value == L"true") return true;
+		else if (value == L"false") return false;
+		else throw relpipe::reader::RelpipeReaderException(L"Unable to parse boolean value: " + value + L" (expecting true or false)");
+	}
+
+	std::string escape(const relpipe::common::type::StringX& value, EscapingProcessor::TextType type) {
+		relpipe::common::type::StringX result = value;
+		EscapingProcessor::QuotingType quotingType = EscapingProcessor::QuotingType::None;
+		for (ConfiguredEscapingProcessor p : escapingProcessors) if (p.enbaled) result = p.processor->escape(result, type, quotingType);
+		return convertor.to_bytes(result);
+	}
+
+	class ConfiguredEscapingProcessor {
+	public:
+		std::shared_ptr<EscapingProcessor> processor;
+		const relpipe::common::type::StringX uri;
+		bool enbaled;
+
+		ConfiguredEscapingProcessor(std::shared_ptr<EscapingProcessor> processor, const relpipe::common::type::StringX uri, bool enbaled) : processor(processor), uri(uri), enbaled(enbaled) {
+		}
+
 	};
 
-	std::string escape(TokenType type, relpipe::common::type::StringX value) {
-		std::wstringstream result;
+	std::vector<ConfiguredEscapingProcessor> escapingProcessors;
 
-		for (wchar_t ch : value) {
-			if (ch == L'\\') result << "\\\\";
-			else if (ch == L'\n') result << L"\\n";
-			else if (ch == L'\r') result << L"\\r";
-			else if (ch == L'\t') result << L"\\t";
-			else if (ch == L'"') result << "\\\"";
-			else result.put(ch);
+	bool setEscaping(const relpipe::common::type::StringX& uri, const relpipe::common::type::StringX& value) {
+		for (ConfiguredEscapingProcessor& p : escapingProcessors) {
+			if (p.uri == uri) {
+				p.enbaled = parseBoolean(value);
+				return true;
+			}
+		}
+		return false;
+	}
+
+	class ConfiguredDialect {
+	public:
+		std::shared_ptr<Dialect> dialect;
+		const relpipe::common::type::StringX uri;
+
+		ConfiguredDialect(std::shared_ptr<Dialect> dialect, const relpipe::common::type::StringX uri) : dialect(dialect), uri(uri) {
 		}
 
-		// TODO: modular escaping (like unescaping in relpipe-in-ini)
-		return convertor.to_bytes(result.str());
+	};
+
+	std::vector<ConfiguredDialect> dialects;
+
+	void setDialect(const relpipe::common::type::StringX& uri) {
+		for (ConfiguredDialect& d : dialects) {
+			if (d.uri == uri) {
+				d.dialect->apply(*this);
+				return;
+			}
+		}
+		throw relpipe::reader::RelpipeReaderException(L"Unsupported INI dialect: " + uri);
 	}
 
 public:
@@ -99,18 +137,27 @@
 	};
 
 	void setOption(relpipe::common::type::StringX uri, relpipe::common::type::StringX value) {
-		// TODO: setOption()
-		if (uri == L"dialect");
-		else if (uri == L"comment-separator-for-sections") commentSeparatorForSections = convertor.to_bytes(value);
-		else if (uri == L"comment-separator-for-entries") commentSeparatorForEntries = convertor.to_bytes(value);
-		else if (uri == L"comment-separator-standalone") commentSeparatorStandalone = convertor.to_bytes(value);
-		else if (uri == L"key-value-separator") keyValueSeparator = convertor.to_bytes(value);
-		else if (uri == L"escape-backspace");
-		else if (uri == L"escape-basic");
-		else if (uri == L"escape-java-properties");
+		// TODO: setOption(): escaping, quotes, allow-sections
+		if (uri == option::Dialect) setDialect(value);
+		else if (uri == option::CommentSeparatorForSections) commentSeparatorForSections = convertor.to_bytes(value);
+		else if (uri == option::CommentSeparatorForEntries) commentSeparatorForEntries = convertor.to_bytes(value);
+		else if (uri == option::CommentSeparatorStandalone) commentSeparatorStandalone = convertor.to_bytes(value);
+		else if (uri == option::KeyValueSeparator) keyValueSeparator = convertor.to_bytes(value);
+		else if (uri == option::AllowSections);
+		else if (uri == option::Quotes);
+		else if (setEscaping(uri, value));
 		else throw relpipe::reader::RelpipeReaderException(L"Unsupported writer option: " + uri);
 	}
 
+	void addDialect(std::shared_ptr<Dialect> dialect, const relpipe::common::type::StringX uri, bool enabledByDefault) {
+		dialects.push_back({dialect, uri});
+		if (enabledByDefault) dialect->apply(*this);
+	}
+
+	void addEscapingProcessor(std::shared_ptr<EscapingProcessor> processor, const relpipe::common::type::StringX uri, bool enabledByDefault) {
+		escapingProcessors.push_back({processor, uri, enabledByDefault});
+	}
+
 	void startDocument() {
 	}
 
@@ -120,9 +167,9 @@
 
 	void startSection(const SectionStartEvent& event) {
 		if (hasContent) output << std::endl;
-		output << "[" << escape(TokenType::SectionName, event.name) << "]";
-		if (event.tag.size()) output << "[" << escape(TokenType::SectionTag, event.tag) << "]";
-		if (event.comment.size()) output << commentSeparatorForSections << escape(TokenType::SectionComment, event.comment);
+		output << "[" << escape(event.name, EscapingProcessor::TextType::SectionName) << "]";
+		if (event.tag.size()) output << "[" << escape(event.tag, EscapingProcessor::TextType::SectionTag) << "]";
+		if (event.comment.size()) output << commentSeparatorForSections << escape(event.comment, EscapingProcessor::TextType::SectionComment);
 		output << std::endl;
 		hasContent = true;
 	}
@@ -132,16 +179,16 @@
 	}
 
 	void entry(const EntryEvent& event) {
-		output << escape(TokenType::EntryKey, event.key);
-		if (event.subKey.size()) output << "[" << escape(TokenType::EntrySubKey, event.subKey) << "]";
-		output << keyValueSeparator << escape(TokenType::EntryValue, event.value);
-		if (event.comment.size()) output << commentSeparatorForEntries << escape(TokenType::EntryComment, event.comment);
+		output << escape(event.key, EscapingProcessor::TextType::EntryKey);
+		if (event.subKey.size()) output << "[" << escape(event.subKey, EscapingProcessor::TextType::EntrySubKey) << "]";
+		output << keyValueSeparator << escape(event.value, EscapingProcessor::TextType::EntryValue);
+		if (event.comment.size()) output << commentSeparatorForEntries << escape(event.comment, EscapingProcessor::TextType::EntryComment);
 		output << std::endl;
 		hasContent = true;
 	}
 
 	void comment(const CommentEvent& event) {
-		output << commentSeparatorStandalone << escape(TokenType::StandaloneComment, event.comment);
+		output << commentSeparatorStandalone << escape(event.comment, EscapingProcessor::TextType::StandaloneComment);
 		output << std::endl;
 		hasContent = true;
 	}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/JavaManifestMFDialect.h	Sat Dec 12 00:01:57 2020 +0100
@@ -0,0 +1,40 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "Dialect.h"
+#include "uri.h"
+
+namespace relpipe {
+namespace out {
+namespace ini {
+
+class INIWriter;
+
+class JavaManifestMFDialect : public Dialect {
+public:
+
+	void apply(INIWriter& writer) override {
+		writer.setOption(option::Dialect, dialect::JavaProperties);
+		writer.setOption(option::KeyValueSeparator, L": ");
+	}
+
+};
+
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/JavaPropertiesDialect.h	Sat Dec 12 00:01:57 2020 +0100
@@ -0,0 +1,46 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "Dialect.h"
+#include "uri.h"
+
+namespace relpipe {
+namespace out {
+namespace ini {
+
+class INIWriter;
+
+class JavaPropertiesDialect : public Dialect {
+public:
+
+	void apply(INIWriter& writer) override {
+		writer.setOption(option::AllowSections, L"false");
+		writer.setOption(option::Quotes, L"");
+		writer.setOption(option::KeyValueSeparator, L"=");
+		writer.setOption(option::CommentSeparatorForSections, L"");
+		writer.setOption(option::CommentSeparatorForEntries, L"");
+		writer.setOption(option::CommentSeparatorStandalone, L"# ");
+		// writer.setOption(escaping::Basic, L"false"); // TODO: basic vs. .properties
+		writer.setOption(escaping::JavaProperties, L"true");
+	}
+
+};
+
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/JavaPropertiesEscapingProcessor.h	Sat Dec 12 00:01:57 2020 +0100
@@ -0,0 +1,51 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <sstream>
+
+#include <relpipe/common/type/typedefs.h>
+
+#include "EscapingProcessor.h"
+
+
+namespace relpipe {
+namespace out {
+namespace ini {
+
+class JavaPropertiesEscapingProcessor : public EscapingProcessor {
+private:
+public:
+
+	relpipe::common::type::StringX escape(const relpipe::common::type::StringX& s, const TextType textType, const QuotingType quotingType) override {
+		std::wstringstream result;
+
+		for (auto ch : s) {
+			if (ch == L'\\') result.put(ESC).put(ESC);
+			else if (ch == L'\n') result.put(ESC).put(L'n');
+			// TODO: escape unicode
+			else result.put(ch);
+		}
+
+		return result.str();
+	}
+
+};
+
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/uri.h	Sat Dec 12 00:01:57 2020 +0100
@@ -0,0 +1,53 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info;
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+namespace relpipe {
+namespace out {
+namespace ini {
+
+// TODO: these strings will become globally unique URIs (or IRIs) after moving to alt2xml and relative/unprefixed names should also work
+
+/** general options of the INI writer */
+namespace option {
+static const wchar_t* AllowSections = L"allow-sections";
+// static const wchar_t* AllowSectionTags = L"allow-section-tags";
+// static const wchar_t* AllowSubKeys = L"allow-sub-keys";
+static const wchar_t* CommentSeparatorForSections = L"comment-separator-for-sections";
+static const wchar_t* CommentSeparatorForEntries = L"comment-separator-for-entries";
+static const wchar_t* CommentSeparatorStandalone = L"comment-separator-standalone";
+static const wchar_t* KeyValueSeparator = L"key-value-separator";
+static const wchar_t* Quotes = L"quotes";
+static const wchar_t* Dialect = L"dialect";
+}
+
+/** names of dynamically registered escaping processors; they are also options */
+namespace escaping {
+static const wchar_t* Basic = L"escape-basic";
+static const wchar_t* JavaProperties = L"escape-java-properties";
+static const wchar_t* Backspace = L"escape-backspace";
+}
+
+/** not options but a values of the dialect option */
+namespace dialect {
+static const wchar_t* JavaProperties = L"java-properties";
+static const wchar_t* JavaManifestMF = L"java-manifest-mf";
+}
+
+}
+}
+}