# HG changeset patch # User František Kučera # Date 1635697840 -3600 # Node ID c9a158da6c320ef97a3ab957af3c13288f5b9edf # Parent e72546725c777eec68f16f0c2f7e7dd953eb1c46 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini) diff -r e72546725c77 -r c9a158da6c32 bash-completion.sh --- a/bash-completion.sh Mon Nov 30 00:12:16 2020 +0100 +++ b/bash-completion.sh Sun Oct 31 17:30:40 2021 +0100 @@ -62,6 +62,8 @@ # TODO: introspection: after moving to alt2xml the available options and their values should be provided by the parser PARSER_OPTIONS=( + "allow-line-continuation-with-escaping" + "allow-line-continuation-with-space" "trim-continuing-lines" "allow-sections" "allow-section-tags" @@ -85,6 +87,7 @@ DIALECTS=( "java-properties" + "java-manifest-mf" ); TREE_STYLES=( @@ -111,6 +114,8 @@ elif [[ "$w3" == "--raw-xml-attribute-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''") elif [[ "$w1" == "--parser-option" ]]; then COMPREPLY=($(compgen -W "${PARSER_OPTIONS[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "allow-line-continuation-with-escaping" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "allow-line-continuation-with-space" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w2" == "--parser-option" && "$w1" == "trim-continuing-lines" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w2" == "--parser-option" && "$w1" == "allow-sections" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w2" == "--parser-option" && "$w1" == "allow-section-tags" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) diff -r e72546725c77 -r c9a158da6c32 src/XMLDocumentConstructor.h --- a/src/XMLDocumentConstructor.h Mon Nov 30 00:12:16 2020 +0100 +++ b/src/XMLDocumentConstructor.h Sun Oct 31 17:30:40 2021 +0100 @@ -25,6 +25,7 @@ #include "lib/BackspaceUnescapingProcessor.h" #include "lib/JavaPropertiesUnescapingProcessor.h" #include "lib/JavaPropertiesDialect.h" +#include "lib/JavaManifestMFDialect.h" #include "lib/XMLNameCodec.h" using namespace relpipe::in::ini::lib; @@ -161,6 +162,7 @@ reader->addUnescapingProcessor(std::make_shared(), unescaping::JavaProperties, false); reader->addUnescapingProcessor(std::make_shared(), unescaping::Backspace, true); reader->addDialect(std::make_shared(), dialect::JavaProperties, false); + reader->addDialect(std::make_shared(), dialect::JavaManifestMF, false); handler = std::make_shared(parser); } diff -r e72546725c77 -r c9a158da6c32 src/lib/BasicUnescapingProcessor.h --- a/src/lib/BasicUnescapingProcessor.h Mon Nov 30 00:12:16 2020 +0100 +++ b/src/lib/BasicUnescapingProcessor.h Sun Oct 31 17:30:40 2021 +0100 @@ -38,11 +38,12 @@ if (i + 1 < length && ch == ESC) { ch = s[i + 1]; if (ch == 'n') put(result, '\n', i); - else if (ch == 'r') put(result, '\r', i); + else if (ch == 'r') put(result, '\r', i); // TODO: should be 0x0d, not 0x0a like \n else if (ch == 't') put(result, '\t', i); else if (ch == 's') put(result, ' ', i); // TODO: Reconsider what is „basic“ escaping and should be supported. else if (ch == '"') put(result, ch, i); // The delimiters (\n,]",') are already unescaped during the first stage in the INIReader while parsing (the delimiter relevant to given environment is unescaped, e.g. \" in "quoted" value). else if (ch == '\'') put(result, ch, i); // So it does not necessary to do it here. But someone might write a="xxx\'zzz" however it is superfluous because a="xxx'zzz" will also work. + else if (ch == '[') put(result, ch, i); else if (ch == ']') put(result, ch, i); else if (ch == ':') put(result, ch, i); else if (ch == ';') put(result, ch, i); diff -r e72546725c77 -r c9a158da6c32 src/lib/Dialect.h --- a/src/lib/Dialect.h Mon Nov 30 00:12:16 2020 +0100 +++ b/src/lib/Dialect.h Sun Oct 31 17:30:40 2021 +0100 @@ -18,8 +18,6 @@ #include "INIReader.h" -using namespace std; - namespace relpipe { namespace in { namespace ini { @@ -37,6 +35,9 @@ */ virtual void apply(INIReader& reader) = 0; + virtual ~Dialect() { + } + }; } diff -r e72546725c77 -r c9a158da6c32 src/lib/INIReader.cpp --- a/src/lib/INIReader.cpp Mon Nov 30 00:12:16 2020 +0100 +++ b/src/lib/INIReader.cpp Sun Oct 31 17:30:40 2021 +0100 @@ -58,6 +58,20 @@ std::vector dialects; + /** + * If there is a „\“ backspace at the end of a physical line, the logical line continues on the next physical line. + * + * Disabling this option makes sense only if we also disable the unescaping processors (unescape-basic, unescape-backspace). + * Otherwise they will complain about „Missing escape sequence“ because they got „\“ at the end of the value. + */ + bool allowLineContinuationsWithEscaping = true; + + /** + * If a line starts with a space, it is continuation of the previous line. + * This rule conflicts with default ignorance of such insignificant whitespace and is quite specific to the Java MANIFEST.MF dialect. + */ + bool allowLineContinuationsWithSpace = false; + /** * By default, we ignore all leading whitespace on continuing lines. * If there should be some spaces or tabs, they should be placed on the previous line before the „\“. @@ -188,8 +202,19 @@ std::string readUntil(const std::string& until, bool* found = nullptr) { std::stringstream result; - for (char ch = peek(); input.good() && !oneOf(ch, until); ch = peek()) { - if (ch == '\\') { + for (char ch = peek(); input.good(); ch = peek()) { + if (allowLineContinuationsWithSpace && ch == '\n') { + get(); + ch = peek(); + if (ch == ' ') get(); + else if (ch == std::istream::traits_type::eof()) break; + else { + if (found) *found = true; + return result.str(); + } + } else if (oneOf(ch, until)) { + break; + } else if (allowLineContinuationsWithEscaping && ch == '\\') { get(); ch = get(); if (oneOf(ch, until) && ch == '\n') processContinuingLine(result); @@ -309,7 +334,9 @@ } void setOption(const std::string& uri, const std::string& value) override { - if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean + if (uri == option::AllowLineContinuationWithEscaping) allowLineContinuationsWithEscaping = parseBoolean(value); + else if (uri == option::AllowLineContinuationWithSpace) allowLineContinuationsWithSpace = parseBoolean(value); + else if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value); else if (uri == option::AllowSections) allowSections = parseBoolean(value); else if (uri == option::AllowSectionTags) allowSectionTags = parseBoolean(value); else if (uri == option::AllowSubKeys) allowSubKeys = parseBoolean(value); diff -r e72546725c77 -r c9a158da6c32 src/lib/JavaManifestMFDialect.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/JavaManifestMFDialect.h Sun Oct 31 17:30:40 2021 +0100 @@ -0,0 +1,51 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include "Dialect.h" +#include "uri.h" + +using namespace std; + +namespace relpipe { +namespace in { +namespace ini { +namespace lib { + +class JavaManifestMFDialect : public Dialect { +public: + + void apply(INIReader& reader) override { + reader.setOption(option::TrimContinuingLines, "true"); + reader.setOption(option::AllowSections, "false"); + reader.setOption(option::AllowSectionTags, "false"); + reader.setOption(option::AllowSubKeys, "false"); + reader.setOption(option::CommentSeparators, "#"); + reader.setOption(option::KeyValueSeparators, ":"); + reader.setOption(option::Quotes, ""); + reader.setOption(option::AllowLineContinuationWithEscaping, "false"); + reader.setOption(option::AllowLineContinuationWithSpace, "true"); + // reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor + reader.setOption(unescaping::JavaProperties, "true"); + } + +}; + +} +} +} +} diff -r e72546725c77 -r c9a158da6c32 src/lib/JavaPropertiesDialect.h --- a/src/lib/JavaPropertiesDialect.h Mon Nov 30 00:12:16 2020 +0100 +++ b/src/lib/JavaPropertiesDialect.h Sun Oct 31 17:30:40 2021 +0100 @@ -34,9 +34,11 @@ reader.setOption(option::AllowSections, "false"); reader.setOption(option::AllowSectionTags, "false"); reader.setOption(option::AllowSubKeys, "false"); - reader.setOption(option::CommentSeparators, "#"); - reader.setOption(option::KeyValueSeparators, "=:"); + reader.setOption(option::CommentSeparators, "#!"); + reader.setOption(option::KeyValueSeparators, "="); reader.setOption(option::Quotes, ""); + reader.setOption(option::AllowLineContinuationWithEscaping, "true"); + reader.setOption(option::AllowLineContinuationWithSpace, "false"); // reader.setOption(unescaping::Basic, "false"); // TODO: maybe disable basic escaping and do exactly what is specified in JavaPropertiesUnescapingProcessor reader.setOption(unescaping::JavaProperties, "true"); } diff -r e72546725c77 -r c9a158da6c32 src/lib/UnescapingProcessor.h --- a/src/lib/UnescapingProcessor.h Mon Nov 30 00:12:16 2020 +0100 +++ b/src/lib/UnescapingProcessor.h Sun Oct 31 17:30:40 2021 +0100 @@ -20,8 +20,6 @@ #include "INIReader.h" -using namespace std; - namespace relpipe { namespace in { namespace ini { @@ -52,6 +50,8 @@ virtual std::string unescape(const std::string& s, const TextType type) = 0; + virtual ~UnescapingProcessor() { + } }; } diff -r e72546725c77 -r c9a158da6c32 src/lib/uri.h --- a/src/lib/uri.h Mon Nov 30 00:12:16 2020 +0100 +++ b/src/lib/uri.h Sun Oct 31 17:30:40 2021 +0100 @@ -25,6 +25,8 @@ /** general options of the INI parser */ namespace option { +static const char* AllowLineContinuationWithEscaping = "allow-line-continuation-with-escaping"; +static const char* AllowLineContinuationWithSpace = "allow-line-continuation-with-space"; static const char* TrimContinuingLines = "trim-continuing-lines"; static const char* AllowSections = "allow-sections"; static const char* AllowSectionTags = "allow-section-tags"; @@ -45,6 +47,7 @@ /** not options but a values of the dialect option */ namespace dialect { static const char* JavaProperties = "java-properties"; +static const char* JavaManifestMF = "java-manifest-mf"; } /** options for configuring the stage where events from the INI parser are converted to SAX events or DOM building */