java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini) v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sun, 31 Oct 2021 17:30:40 +0100
branchv_0
changeset 33 c9a158da6c32
parent 32 e72546725c77
child 34 4b8641293b83
java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
bash-completion.sh
src/XMLDocumentConstructor.h
src/lib/BasicUnescapingProcessor.h
src/lib/Dialect.h
src/lib/INIReader.cpp
src/lib/JavaManifestMFDialect.h
src/lib/JavaPropertiesDialect.h
src/lib/UnescapingProcessor.h
src/lib/uri.h
--- a/bash-completion.sh	Mon Nov 30 00:12:16 2020 +0100
+++ b/bash-completion.sh	Sun Oct 31 17:30:40 2021 +0100
@@ -62,6 +62,8 @@
 	# TODO: introspection: after moving to alt2xml the available options and their values should be provided by the parser
 
 	PARSER_OPTIONS=(
+		"allow-line-continuation-with-escaping"
+		"allow-line-continuation-with-space"
 		"trim-continuing-lines"
 		"allow-sections"
 		"allow-section-tags"
@@ -85,6 +87,7 @@
 
 	DIALECTS=(
 		"java-properties"
+		"java-manifest-mf"
 	);
 
 	TREE_STYLES=(
@@ -111,6 +114,8 @@
 	elif [[ "$w3" == "--raw-xml-attribute-wrapper"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
 
 	elif [[ "$w1" == "--parser-option"                                  ]];    then COMPREPLY=($(compgen -W "${PARSER_OPTIONS[*]}" -- "$w0"))
+	elif [[ "$w2" == "--parser-option" && "$w1" == "allow-line-continuation-with-escaping"    ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+	elif [[ "$w2" == "--parser-option" && "$w1" == "allow-line-continuation-with-space"       ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
 	elif [[ "$w2" == "--parser-option" && "$w1" == "trim-continuing-lines"                    ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
 	elif [[ "$w2" == "--parser-option" && "$w1" == "allow-sections"                           ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
 	elif [[ "$w2" == "--parser-option" && "$w1" == "allow-section-tags"                       ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
--- a/src/XMLDocumentConstructor.h	Mon Nov 30 00:12:16 2020 +0100
+++ b/src/XMLDocumentConstructor.h	Sun Oct 31 17:30:40 2021 +0100
@@ -25,6 +25,7 @@
 #include "lib/BackspaceUnescapingProcessor.h"
 #include "lib/JavaPropertiesUnescapingProcessor.h"
 #include "lib/JavaPropertiesDialect.h"
+#include "lib/JavaManifestMFDialect.h"
 #include "lib/XMLNameCodec.h"
 
 using namespace relpipe::in::ini::lib;
@@ -161,6 +162,7 @@
 		reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), unescaping::JavaProperties, false);
 		reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), unescaping::Backspace, true);
 		reader->addDialect(std::make_shared<JavaPropertiesDialect>(), dialect::JavaProperties, false);
+		reader->addDialect(std::make_shared<JavaManifestMFDialect>(), dialect::JavaManifestMF, false);
 		handler = std::make_shared<HierarchicalINIContentHandler>(parser);
 	}
 
--- a/src/lib/BasicUnescapingProcessor.h	Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/BasicUnescapingProcessor.h	Sun Oct 31 17:30:40 2021 +0100
@@ -38,11 +38,12 @@
 			if (i + 1 < length && ch == ESC) {
 				ch = s[i + 1];
 				if (ch == 'n') put(result, '\n', i);
-				else if (ch == 'r') put(result, '\r', i);
+				else if (ch == 'r') put(result, '\r', i); // TODO: should be 0x0d, not 0x0a like \n
 				else if (ch == 't') put(result, '\t', i);
 				else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported.
 				else if (ch == '"') put(result, ch, i); //        The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value).
 				else if (ch == '\'') put(result, ch, i); //       So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work.
+				else if (ch == '[') put(result, ch, i);
 				else if (ch == ']') put(result, ch, i);
 				else if (ch == ':') put(result, ch, i);
 				else if (ch == ';') put(result, ch, i);
--- a/src/lib/Dialect.h	Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/Dialect.h	Sun Oct 31 17:30:40 2021 +0100
@@ -18,8 +18,6 @@
 
 #include "INIReader.h"
 
-using namespace std;
-
 namespace relpipe {
 namespace in {
 namespace ini {
@@ -37,6 +35,9 @@
 	 */
 	virtual void apply(INIReader& reader) = 0;
 
+	virtual ~Dialect() {
+	}
+
 };
 
 }
--- a/src/lib/INIReader.cpp	Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/INIReader.cpp	Sun Oct 31 17:30:40 2021 +0100
@@ -58,6 +58,20 @@
 
 	std::vector<ConfiguredDialect> dialects;
 
+	/**
+	 * If there is a „\“ backspace at the end of a physical line, the logical line continues on the next physical line.
+	 *
+	 * Disabling this option makes sense only if we also disable the unescaping processors (unescape-basic, unescape-backspace).
+	 * Otherwise they will complain about „Missing escape sequence“ because they got „\“ at the end of the value.
+	 */
+	bool allowLineContinuationsWithEscaping = true;
+
+	/**
+	 * If a line starts with a space, it is continuation of the previous line.
+	 * This rule conflicts with default ignorance of such insignificant whitespace and is quite specific to the Java MANIFEST.MF dialect.
+	 */
+	bool allowLineContinuationsWithSpace = false;
+
 	/** 
 	 * By default, we ignore all leading whitespace on continuing lines.
 	 * If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
@@ -188,8 +202,19 @@
 	std::string readUntil(const std::string& until, bool* found = nullptr) {
 		std::stringstream result;
 
-		for (char ch = peek(); input.good() && !oneOf(ch, until); ch = peek()) {
-			if (ch == '\\') {
+		for (char ch = peek(); input.good(); ch = peek()) {
+			if (allowLineContinuationsWithSpace && ch == '\n') {
+				get();
+				ch = peek();
+				if (ch == ' ') get();
+				else if (ch == std::istream::traits_type::eof()) break;
+				else {
+					if (found) *found = true;
+					return result.str();
+				}
+			} else if (oneOf(ch, until)) {
+				break;
+			} else if (allowLineContinuationsWithEscaping && ch == '\\') {
 				get();
 				ch = get();
 				if (oneOf(ch, until) && ch == '\n') processContinuingLine(result);
@@ -309,7 +334,9 @@
 	}
 
 	void setOption(const std::string& uri, const std::string& value) override {
-		if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean
+		if (uri == option::AllowLineContinuationWithEscaping) allowLineContinuationsWithEscaping = parseBoolean(value);
+		else if (uri == option::AllowLineContinuationWithSpace) allowLineContinuationsWithSpace = parseBoolean(value);
+		else if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value);
 		else if (uri == option::AllowSections) allowSections = parseBoolean(value);
 		else if (uri == option::AllowSectionTags) allowSectionTags = parseBoolean(value);
 		else if (uri == option::AllowSubKeys) allowSubKeys = parseBoolean(value);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/JavaManifestMFDialect.h	Sun Oct 31 17:30:40 2021 +0100
@@ -0,0 +1,51 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "Dialect.h"
+#include "uri.h"
+
+using namespace std;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class JavaManifestMFDialect : public Dialect {
+public:
+
+	void apply(INIReader& reader) override {
+		reader.setOption(option::TrimContinuingLines, "true");
+		reader.setOption(option::AllowSections, "false");
+		reader.setOption(option::AllowSectionTags, "false");
+		reader.setOption(option::AllowSubKeys, "false");
+		reader.setOption(option::CommentSeparators, "#");
+		reader.setOption(option::KeyValueSeparators, ":");
+		reader.setOption(option::Quotes, "");
+		reader.setOption(option::AllowLineContinuationWithEscaping, "false");
+		reader.setOption(option::AllowLineContinuationWithSpace, "true");
+		// reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor
+		reader.setOption(unescaping::JavaProperties, "true");
+	}
+
+};
+
+}
+}
+}
+}
--- a/src/lib/JavaPropertiesDialect.h	Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/JavaPropertiesDialect.h	Sun Oct 31 17:30:40 2021 +0100
@@ -34,9 +34,11 @@
 		reader.setOption(option::AllowSections, "false");
 		reader.setOption(option::AllowSectionTags, "false");
 		reader.setOption(option::AllowSubKeys, "false");
-		reader.setOption(option::CommentSeparators, "#");
-		reader.setOption(option::KeyValueSeparators, "=:");
+		reader.setOption(option::CommentSeparators, "#!");
+		reader.setOption(option::KeyValueSeparators, "=");
 		reader.setOption(option::Quotes, "");
+		reader.setOption(option::AllowLineContinuationWithEscaping, "true");
+		reader.setOption(option::AllowLineContinuationWithSpace, "false");
 		// reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor
 		reader.setOption(unescaping::JavaProperties, "true");
 	}
--- a/src/lib/UnescapingProcessor.h	Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/UnescapingProcessor.h	Sun Oct 31 17:30:40 2021 +0100
@@ -20,8 +20,6 @@
 
 #include "INIReader.h"
 
-using namespace std;
-
 namespace relpipe {
 namespace in {
 namespace ini {
@@ -52,6 +50,8 @@
 
 	virtual std::string unescape(const std::string& s, const TextType type) = 0;
 
+	virtual ~UnescapingProcessor() {
+	}
 };
 
 }
--- a/src/lib/uri.h	Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/uri.h	Sun Oct 31 17:30:40 2021 +0100
@@ -25,6 +25,8 @@
 
 /** general options of the INI parser */
 namespace option {
+static const char* AllowLineContinuationWithEscaping = "allow-line-continuation-with-escaping";
+static const char* AllowLineContinuationWithSpace = "allow-line-continuation-with-space";
 static const char* TrimContinuingLines = "trim-continuing-lines";
 static const char* AllowSections = "allow-sections";
 static const char* AllowSectionTags = "allow-section-tags";
@@ -45,6 +47,7 @@
 /** not options but a values of the dialect option */
 namespace dialect {
 static const char* JavaProperties = "java-properties";
+static const char* JavaManifestMF = "java-manifest-mf";
 }
 
 /** options for configuring the stage where events from the INI parser are converted to SAX events or DOM building */