java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
--- a/bash-completion.sh Mon Nov 30 00:12:16 2020 +0100
+++ b/bash-completion.sh Sun Oct 31 17:30:40 2021 +0100
@@ -62,6 +62,8 @@
# TODO: introspection: after moving to alt2xml the available options and their values should be provided by the parser
PARSER_OPTIONS=(
+ "allow-line-continuation-with-escaping"
+ "allow-line-continuation-with-space"
"trim-continuing-lines"
"allow-sections"
"allow-section-tags"
@@ -85,6 +87,7 @@
DIALECTS=(
"java-properties"
+ "java-manifest-mf"
);
TREE_STYLES=(
@@ -111,6 +114,8 @@
elif [[ "$w3" == "--raw-xml-attribute-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''")
elif [[ "$w1" == "--parser-option" ]]; then COMPREPLY=($(compgen -W "${PARSER_OPTIONS[*]}" -- "$w0"))
+ elif [[ "$w2" == "--parser-option" && "$w1" == "allow-line-continuation-with-escaping" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+ elif [[ "$w2" == "--parser-option" && "$w1" == "allow-line-continuation-with-space" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
elif [[ "$w2" == "--parser-option" && "$w1" == "trim-continuing-lines" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
elif [[ "$w2" == "--parser-option" && "$w1" == "allow-sections" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
elif [[ "$w2" == "--parser-option" && "$w1" == "allow-section-tags" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
--- a/src/XMLDocumentConstructor.h Mon Nov 30 00:12:16 2020 +0100
+++ b/src/XMLDocumentConstructor.h Sun Oct 31 17:30:40 2021 +0100
@@ -25,6 +25,7 @@
#include "lib/BackspaceUnescapingProcessor.h"
#include "lib/JavaPropertiesUnescapingProcessor.h"
#include "lib/JavaPropertiesDialect.h"
+#include "lib/JavaManifestMFDialect.h"
#include "lib/XMLNameCodec.h"
using namespace relpipe::in::ini::lib;
@@ -161,6 +162,7 @@
reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), unescaping::JavaProperties, false);
reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), unescaping::Backspace, true);
reader->addDialect(std::make_shared<JavaPropertiesDialect>(), dialect::JavaProperties, false);
+ reader->addDialect(std::make_shared<JavaManifestMFDialect>(), dialect::JavaManifestMF, false);
handler = std::make_shared<HierarchicalINIContentHandler>(parser);
}
--- a/src/lib/BasicUnescapingProcessor.h Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/BasicUnescapingProcessor.h Sun Oct 31 17:30:40 2021 +0100
@@ -38,11 +38,12 @@
if (i + 1 < length && ch == ESC) {
ch = s[i + 1];
if (ch == 'n') put(result, '\n', i);
- else if (ch == 'r') put(result, '\r', i);
+ else if (ch == 'r') put(result, '\r', i); // TODO: should be 0x0d, not 0x0a like \n
else if (ch == 't') put(result, '\t', i);
else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported.
else if (ch == '"') put(result, ch, i); // The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value).
else if (ch == '\'') put(result, ch, i); // So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work.
+ else if (ch == '[') put(result, ch, i);
else if (ch == ']') put(result, ch, i);
else if (ch == ':') put(result, ch, i);
else if (ch == ';') put(result, ch, i);
--- a/src/lib/Dialect.h Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/Dialect.h Sun Oct 31 17:30:40 2021 +0100
@@ -18,8 +18,6 @@
#include "INIReader.h"
-using namespace std;
-
namespace relpipe {
namespace in {
namespace ini {
@@ -37,6 +35,9 @@
*/
virtual void apply(INIReader& reader) = 0;
+ virtual ~Dialect() {
+ }
+
};
}
--- a/src/lib/INIReader.cpp Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/INIReader.cpp Sun Oct 31 17:30:40 2021 +0100
@@ -58,6 +58,20 @@
std::vector<ConfiguredDialect> dialects;
+ /**
+ * If there is a „\“ backspace at the end of a physical line, the logical line continues on the next physical line.
+ *
+ * Disabling this option makes sense only if we also disable the unescaping processors (unescape-basic, unescape-backspace).
+ * Otherwise they will complain about „Missing escape sequence“ because they got „\“ at the end of the value.
+ */
+ bool allowLineContinuationsWithEscaping = true;
+
+ /**
+ * If a line starts with a space, it is continuation of the previous line.
+ * This rule conflicts with default ignorance of such insignificant whitespace and is quite specific to the Java MANIFEST.MF dialect.
+ */
+ bool allowLineContinuationsWithSpace = false;
+
/**
* By default, we ignore all leading whitespace on continuing lines.
* If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
@@ -188,8 +202,19 @@
std::string readUntil(const std::string& until, bool* found = nullptr) {
std::stringstream result;
- for (char ch = peek(); input.good() && !oneOf(ch, until); ch = peek()) {
- if (ch == '\\') {
+ for (char ch = peek(); input.good(); ch = peek()) {
+ if (allowLineContinuationsWithSpace && ch == '\n') {
+ get();
+ ch = peek();
+ if (ch == ' ') get();
+ else if (ch == std::istream::traits_type::eof()) break;
+ else {
+ if (found) *found = true;
+ return result.str();
+ }
+ } else if (oneOf(ch, until)) {
+ break;
+ } else if (allowLineContinuationsWithEscaping && ch == '\\') {
get();
ch = get();
if (oneOf(ch, until) && ch == '\n') processContinuingLine(result);
@@ -309,7 +334,9 @@
}
void setOption(const std::string& uri, const std::string& value) override {
- if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean
+ if (uri == option::AllowLineContinuationWithEscaping) allowLineContinuationsWithEscaping = parseBoolean(value);
+ else if (uri == option::AllowLineContinuationWithSpace) allowLineContinuationsWithSpace = parseBoolean(value);
+ else if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value);
else if (uri == option::AllowSections) allowSections = parseBoolean(value);
else if (uri == option::AllowSectionTags) allowSectionTags = parseBoolean(value);
else if (uri == option::AllowSubKeys) allowSubKeys = parseBoolean(value);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/JavaManifestMFDialect.h Sun Oct 31 17:30:40 2021 +0100
@@ -0,0 +1,51 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "Dialect.h"
+#include "uri.h"
+
+using namespace std;
+
+namespace relpipe {
+namespace in {
+namespace ini {
+namespace lib {
+
+class JavaManifestMFDialect : public Dialect {
+public:
+
+ void apply(INIReader& reader) override {
+ reader.setOption(option::TrimContinuingLines, "true");
+ reader.setOption(option::AllowSections, "false");
+ reader.setOption(option::AllowSectionTags, "false");
+ reader.setOption(option::AllowSubKeys, "false");
+ reader.setOption(option::CommentSeparators, "#");
+ reader.setOption(option::KeyValueSeparators, ":");
+ reader.setOption(option::Quotes, "");
+ reader.setOption(option::AllowLineContinuationWithEscaping, "false");
+ reader.setOption(option::AllowLineContinuationWithSpace, "true");
+ // reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor
+ reader.setOption(unescaping::JavaProperties, "true");
+ }
+
+};
+
+}
+}
+}
+}
--- a/src/lib/JavaPropertiesDialect.h Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/JavaPropertiesDialect.h Sun Oct 31 17:30:40 2021 +0100
@@ -34,9 +34,11 @@
reader.setOption(option::AllowSections, "false");
reader.setOption(option::AllowSectionTags, "false");
reader.setOption(option::AllowSubKeys, "false");
- reader.setOption(option::CommentSeparators, "#");
- reader.setOption(option::KeyValueSeparators, "=:");
+ reader.setOption(option::CommentSeparators, "#!");
+ reader.setOption(option::KeyValueSeparators, "=");
reader.setOption(option::Quotes, "");
+ reader.setOption(option::AllowLineContinuationWithEscaping, "true");
+ reader.setOption(option::AllowLineContinuationWithSpace, "false");
// reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor
reader.setOption(unescaping::JavaProperties, "true");
}
--- a/src/lib/UnescapingProcessor.h Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/UnescapingProcessor.h Sun Oct 31 17:30:40 2021 +0100
@@ -20,8 +20,6 @@
#include "INIReader.h"
-using namespace std;
-
namespace relpipe {
namespace in {
namespace ini {
@@ -52,6 +50,8 @@
virtual std::string unescape(const std::string& s, const TextType type) = 0;
+ virtual ~UnescapingProcessor() {
+ }
};
}
--- a/src/lib/uri.h Mon Nov 30 00:12:16 2020 +0100
+++ b/src/lib/uri.h Sun Oct 31 17:30:40 2021 +0100
@@ -25,6 +25,8 @@
/** general options of the INI parser */
namespace option {
+static const char* AllowLineContinuationWithEscaping = "allow-line-continuation-with-escaping";
+static const char* AllowLineContinuationWithSpace = "allow-line-continuation-with-space";
static const char* TrimContinuingLines = "trim-continuing-lines";
static const char* AllowSections = "allow-sections";
static const char* AllowSectionTags = "allow-section-tags";
@@ -45,6 +47,7 @@
/** not options but a values of the dialect option */
namespace dialect {
static const char* JavaProperties = "java-properties";
+static const char* JavaManifestMF = "java-manifest-mf";
}
/** options for configuring the stage where events from the INI parser are converted to SAX events or DOM building */