--- a/bash-completion.sh Thu Nov 26 18:52:49 2020 +0100
+++ b/bash-completion.sh Fri Nov 27 16:29:12 2020 +0100
@@ -27,6 +27,21 @@
"false"
)
+ PARSER_OPTIONS=(
+ "trim-continuing-lines"
+ "allow-section-tags"
+ "allow-sub-keys"
+ "comment-separators"
+ "key-value-separators"
+ "quotes"
+ "dialect"
+ );
+
+ DIALECTS=(
+ "default-ini"
+ "java-properties"
+ );
+
if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''")
elif [[ "$w1" == "--enable-sections" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
elif [[ "$w1" == "--enable-sub-keys" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
@@ -34,6 +49,14 @@
elif [[ "$w1" == "--enable-whitespace" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
elif [[ "$w1" == "--enable-line-numbers" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
elif [[ "$w1" == "--enable-event-numbers" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+ elif [[ "$w1" == "--parser-option" ]]; then COMPREPLY=($(compgen -W "${PARSER_OPTIONS[*]}" -- "$w0"))
+ elif [[ "$w2" == "--parser-option" && "$w1" == "trim-continuing-lines" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+ elif [[ "$w2" == "--parser-option" && "$w1" == "allow-section-tags" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+ elif [[ "$w2" == "--parser-option" && "$w1" == "allow-sub-keys" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+ elif [[ "$w2" == "--parser-option" && "$w1" == "dialect" ]]; then COMPREPLY=($(compgen -W "${DIALECTS[*]}" -- "$w0"))
+ elif [[ "$w2" == "--parser-option" && "$w1" == "comment-separators" && "x$w0" == "x" ]]; then COMPREPLY=("'#;'")
+ elif [[ "$w2" == "--parser-option" && "$w1" == "key-value-separators" && "x$w0" == "x" ]]; then COMPREPLY=("'=:'")
+ elif [[ "$w2" == "--parser-option" && "$w1" == "quotes" && "x$w0" == "x" ]]; then COMPREPLY=("'\"\\''")
else
OPTIONS=(
"--relation"
@@ -43,6 +66,7 @@
"--enable-whitespace"
"--enable-line-numbers"
"--enable-event-numbers"
+ "--parser-option"
)
COMPREPLY=($(compgen -W "${OPTIONS[*]}" -- "$w0"))
fi
--- a/src/CLIParser.h Thu Nov 26 18:52:49 2020 +0100
+++ b/src/CLIParser.h Fri Nov 27 16:29:12 2020 +0100
@@ -49,6 +49,7 @@
public:
static const relpipe::writer::string_t OPTION_RELATION;
+ static const relpipe::writer::string_t OPTION_PARSER_OPTION;
static const relpipe::writer::string_t OPTION_ENABLE_SECTIONS;
static const relpipe::writer::string_t OPTION_ENABLE_SUB_KEYS;
static const relpipe::writer::string_t OPTION_ENABLE_COMMENTS;
@@ -63,6 +64,7 @@
relpipe::writer::string_t option = readNext(arguments, i);
if (option == OPTION_RELATION) c.relation = readNext(arguments, i);
+ else if (option == OPTION_PARSER_OPTION) c.parserOptions.push_back({readNext(arguments, i), readNext(arguments, i)});
else if (option == OPTION_ENABLE_SECTIONS) c.enableSections = parseBoolean(readNext(arguments, i));
else if (option == OPTION_ENABLE_SUB_KEYS) c.enableSubKeys = parseBoolean(readNext(arguments, i));
else if (option == OPTION_ENABLE_COMMENTS) c.enableComments = parseBoolean(readNext(arguments, i));
@@ -80,6 +82,7 @@
};
const relpipe::writer::string_t CLIParser::OPTION_RELATION = L"--relation";
+const relpipe::writer::string_t CLIParser::OPTION_PARSER_OPTION = L"--parser-option";
const relpipe::writer::string_t CLIParser::OPTION_ENABLE_SECTIONS = L"--enable-sections";
const relpipe::writer::string_t CLIParser::OPTION_ENABLE_SUB_KEYS = L"--enable-sub-keys";
const relpipe::writer::string_t CLIParser::OPTION_ENABLE_COMMENTS = L"--enable-comments";
--- a/src/Configuration.h Thu Nov 26 18:52:49 2020 +0100
+++ b/src/Configuration.h Fri Nov 27 16:29:12 2020 +0100
@@ -26,9 +26,19 @@
namespace in {
namespace ini {
+class ParserOptionRecipe {
+public:
+ relpipe::writer::string_t uri;
+ relpipe::writer::string_t value;
+
+ ParserOptionRecipe(relpipe::writer::string_t uri, relpipe::writer::string_t value) : uri(uri), value(value) {
+ }
+};
+
class Configuration {
public:
relpipe::writer::string_t relation = L"ini";
+ std::vector<ParserOptionRecipe> parserOptions;
relpipe::writer::boolean_t enableLineNumbers = false;
relpipe::writer::boolean_t enableEventNumbers = false;
relpipe::writer::boolean_t enableSections = true;
--- a/src/INICommand.cpp Thu Nov 26 18:52:49 2020 +0100
+++ b/src/INICommand.cpp Fri Nov 27 16:29:12 2020 +0100
@@ -170,7 +170,7 @@
void INICommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
FlatINIContentHandler handler(writer, configuration);
std::shared_ptr<INIReader> reader(INIReader::create(input));
- // TODO: configure the INIReader (features/properties) according to our Configuration (sub-keys etc.)
+ for (ParserOptionRecipe option : configuration.parserOptions) reader->setOption(convertor.to_bytes(option.uri), convertor.to_bytes(option.value));
BasicUnescapingINIContentHandler unescapingHandler(handler, false);
JavaPropertiesUnescapingINIContentHandler javaHandler(unescapingHandler, true);
reader->addHandler(&javaHandler);
--- a/src/INICommand.h Thu Nov 26 18:52:49 2020 +0100
+++ b/src/INICommand.h Fri Nov 27 16:29:12 2020 +0100
@@ -30,6 +30,8 @@
namespace ini {
class INICommand {
+private:
+ wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
public:
void process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration);
--- a/src/lib/INIReader.cpp Thu Nov 26 18:52:49 2020 +0100
+++ b/src/lib/INIReader.cpp Fri Nov 27 16:29:12 2020 +0100
@@ -33,8 +33,6 @@
std::vector<INIContentHandler*> handlers;
/**
- * This might be configurable.
- *
* By default, we ignore all leading whitespace on continuing lines.
* If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
* If a line break is desired, it should be written as \n (escaped) or the value should be quoted in " or '.
@@ -42,11 +40,9 @@
* Related specifications:
* - https://docs.oracle.com/javase/8/docs/api/index.html?java/util/Properties.html
*/
- bool consumeLeadingSpacesOnContinuingLines = true;
+ bool trimLeadingSpacesOnContinuingLines = true;
/**
- * This might be configurable.
- *
* KDE uses some weird INI dialect that allows [section][x] syntax where „x“ is kind of „tag“ that signalizes some properties of given section.
* Line „[section_1][$i]“ means that the „section_1“ is „locked“.
* We may emit this information somehow later, but for now, it is just ignored.
@@ -59,8 +55,6 @@
bool allowSectionTags = true;
/**
- * This might be configurable.
- *
* If whole key is „aaa[bbb]“ then „aaa“ is considered to be the key and „bbb“ the sub-key.
* No \[ escaping is currently supported, so the key might not contain the bracket character.
*
@@ -71,8 +65,6 @@
bool allowSubKeys = true;
/**
- * This might be configurable.
- *
* Classic INI uses „key=value“ syntax.
* But some other formats/dialects might use key:value.
*
@@ -83,8 +75,6 @@
std::string keyValueSeparators = "=";
/**
- * This might be configurable.
- *
* Classic INI uses „; comment“ syntax.
* But many existing files contain „# comment“ lines.
*
@@ -93,8 +83,6 @@
std::string commentSeparators = ";#";
/**
- * This might be configurable.
- *
* INI often support both "quotes" and 'apostrophes' styles.
* But some dialects may support only one of them or not support quoting at all.
*
@@ -146,7 +134,7 @@
}
void processContinuingLine(std::stringstream& result) {
- if (consumeLeadingSpacesOnContinuingLines) readSpacesAndTabs();
+ if (trimLeadingSpacesOnContinuingLines) readSpacesAndTabs();
else result.put('\n');
}
@@ -237,11 +225,49 @@
return std::regex_replace(s, std::regex("^\\s+|\\s+$"), "");
}
+ /**
+ * TODO: use a common method
+ */
+ bool parseBoolean(const std::string& value) {
+ if (value == "true") return true;
+ else if (value == "false") return false;
+ else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)");
+ }
+
+ void setDialect(const std::string& name) {
+ if (name == "default-ini") {
+ // already set
+ } else if (name == "java-properties") {
+ trimLeadingSpacesOnContinuingLines = true;
+ allowSectionTags = false;
+ allowSubKeys = false;
+ commentSeparators = "#";
+ keyValueSeparators = "=:";
+ quotes = "";
+ // TODO: allowSections = false;
+ // TODO: enable unicode unescaping
+ } else {
+ throw std::invalid_argument(std::string("Unsupported INI dialect: ") + name);
+ }
+ }
+
public:
INIReaderImpl(std::istream& input) : input(input) {
}
+ void setOption(const std::string& uri, const std::string& value) override {
+ if (uri == "trim-continuing-lines") trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean
+ // TODO: else if (uri == "allow-sections") allowSections = parseBoolean(value);
+ else if (uri == "allow-section-tags") allowSectionTags = parseBoolean(value);
+ else if (uri == "allow-sub-keys") allowSubKeys = parseBoolean(value);
+ else if (uri == "comment-separators") commentSeparators = value;
+ else if (uri == "key-value-separators") keyValueSeparators = value;
+ else if (uri == "quotes") quotes = value;
+ else if (uri == "dialect") setDialect(value);
+ else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
+ }
+
void addHandler(INIContentHandler* handler) override {
handlers.push_back(handler);
}
--- a/src/lib/INIReader.h Thu Nov 26 18:52:49 2020 +0100
+++ b/src/lib/INIReader.h Fri Nov 27 16:29:12 2020 +0100
@@ -32,6 +32,18 @@
class INIReader {
public:
virtual ~INIReader() = default;
+ /**
+ * TODO: after moving to alt2xml:
+ * - option will be identified by globally unique URI/IRI
+ * - parsers will provide catalog of supported options (names, enum values, documentation)
+ * - options serves as both XML parser features and properties and are mapped to them
+ */
+ virtual void setOption(const std::string& uri, const std::string& value) = 0;
+ /**
+ * TODO: after moving to alt2xml:
+ * - this will be generic handler for SAX event
+ * - but both sides will know the schema (allowed elements and attributes for INI events)
+ */
virtual void addHandler(INIContentHandler* handler) = 0;
virtual void process() = 0;
static INIReader* create(std::istream& input);