# HG changeset patch # User František Kučera # Date 1606490952 -3600 # Node ID b497140b0b633aabd4b55fa073f8634c6d16fcd1 # Parent 29d673a54ecf5ba7506ed9b4209fc313f1cd231a enable configuring the parser from CLI: --parser-option diff -r 29d673a54ecf -r b497140b0b63 bash-completion.sh --- a/bash-completion.sh Thu Nov 26 18:52:49 2020 +0100 +++ b/bash-completion.sh Fri Nov 27 16:29:12 2020 +0100 @@ -27,6 +27,21 @@ "false" ) + PARSER_OPTIONS=( + "trim-continuing-lines" + "allow-section-tags" + "allow-sub-keys" + "comment-separators" + "key-value-separators" + "quotes" + "dialect" + ); + + DIALECTS=( + "default-ini" + "java-properties" + ); + if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''") elif [[ "$w1" == "--enable-sections" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w1" == "--enable-sub-keys" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) @@ -34,6 +49,14 @@ elif [[ "$w1" == "--enable-whitespace" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w1" == "--enable-line-numbers" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w1" == "--enable-event-numbers" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w1" == "--parser-option" ]]; then COMPREPLY=($(compgen -W "${PARSER_OPTIONS[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "trim-continuing-lines" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "allow-section-tags" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "allow-sub-keys" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "dialect" ]]; then COMPREPLY=($(compgen -W "${DIALECTS[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "comment-separators" && "x$w0" == "x" ]]; then COMPREPLY=("'#;'") + elif [[ "$w2" == "--parser-option" && "$w1" == "key-value-separators" && "x$w0" == "x" ]]; then COMPREPLY=("'=:'") + elif [[ "$w2" == "--parser-option" && "$w1" == "quotes" && "x$w0" == "x" ]]; then COMPREPLY=("'\"\\''") else OPTIONS=( "--relation" @@ -43,6 +66,7 @@ "--enable-whitespace" "--enable-line-numbers" "--enable-event-numbers" + "--parser-option" ) COMPREPLY=($(compgen -W "${OPTIONS[*]}" -- "$w0")) fi diff -r 29d673a54ecf -r b497140b0b63 src/CLIParser.h --- a/src/CLIParser.h Thu Nov 26 18:52:49 2020 +0100 +++ b/src/CLIParser.h Fri Nov 27 16:29:12 2020 +0100 @@ -49,6 +49,7 @@ public: static const relpipe::writer::string_t OPTION_RELATION; + static const relpipe::writer::string_t OPTION_PARSER_OPTION; static const relpipe::writer::string_t OPTION_ENABLE_SECTIONS; static const relpipe::writer::string_t OPTION_ENABLE_SUB_KEYS; static const relpipe::writer::string_t OPTION_ENABLE_COMMENTS; @@ -63,6 +64,7 @@ relpipe::writer::string_t option = readNext(arguments, i); if (option == OPTION_RELATION) c.relation = readNext(arguments, i); + else if (option == OPTION_PARSER_OPTION) c.parserOptions.push_back({readNext(arguments, i), readNext(arguments, i)}); else if (option == OPTION_ENABLE_SECTIONS) c.enableSections = parseBoolean(readNext(arguments, i)); else if (option == OPTION_ENABLE_SUB_KEYS) c.enableSubKeys = parseBoolean(readNext(arguments, i)); else if (option == OPTION_ENABLE_COMMENTS) c.enableComments = parseBoolean(readNext(arguments, i)); @@ -80,6 +82,7 @@ }; const relpipe::writer::string_t CLIParser::OPTION_RELATION = L"--relation"; +const relpipe::writer::string_t CLIParser::OPTION_PARSER_OPTION = L"--parser-option"; const relpipe::writer::string_t CLIParser::OPTION_ENABLE_SECTIONS = L"--enable-sections"; const relpipe::writer::string_t CLIParser::OPTION_ENABLE_SUB_KEYS = L"--enable-sub-keys"; const relpipe::writer::string_t CLIParser::OPTION_ENABLE_COMMENTS = L"--enable-comments"; diff -r 29d673a54ecf -r b497140b0b63 src/Configuration.h --- a/src/Configuration.h Thu Nov 26 18:52:49 2020 +0100 +++ b/src/Configuration.h Fri Nov 27 16:29:12 2020 +0100 @@ -26,9 +26,19 @@ namespace in { namespace ini { +class ParserOptionRecipe { +public: + relpipe::writer::string_t uri; + relpipe::writer::string_t value; + + ParserOptionRecipe(relpipe::writer::string_t uri, relpipe::writer::string_t value) : uri(uri), value(value) { + } +}; + class Configuration { public: relpipe::writer::string_t relation = L"ini"; + std::vector parserOptions; relpipe::writer::boolean_t enableLineNumbers = false; relpipe::writer::boolean_t enableEventNumbers = false; relpipe::writer::boolean_t enableSections = true; diff -r 29d673a54ecf -r b497140b0b63 src/INICommand.cpp --- a/src/INICommand.cpp Thu Nov 26 18:52:49 2020 +0100 +++ b/src/INICommand.cpp Fri Nov 27 16:29:12 2020 +0100 @@ -170,7 +170,7 @@ void INICommand::process(std::istream& input, std::shared_ptr writer, Configuration& configuration) { FlatINIContentHandler handler(writer, configuration); std::shared_ptr reader(INIReader::create(input)); - // TODO: configure the INIReader (features/properties) according to our Configuration (sub-keys etc.) + for (ParserOptionRecipe option : configuration.parserOptions) reader->setOption(convertor.to_bytes(option.uri), convertor.to_bytes(option.value)); BasicUnescapingINIContentHandler unescapingHandler(handler, false); JavaPropertiesUnescapingINIContentHandler javaHandler(unescapingHandler, true); reader->addHandler(&javaHandler); diff -r 29d673a54ecf -r b497140b0b63 src/INICommand.h --- a/src/INICommand.h Thu Nov 26 18:52:49 2020 +0100 +++ b/src/INICommand.h Fri Nov 27 16:29:12 2020 +0100 @@ -30,6 +30,8 @@ namespace ini { class INICommand { +private: + wstring_convert < codecvt_utf8> convertor; // INI parser works with UTF-8 public: void process(std::istream& input, std::shared_ptr writer, Configuration& configuration); diff -r 29d673a54ecf -r b497140b0b63 src/lib/INIReader.cpp --- a/src/lib/INIReader.cpp Thu Nov 26 18:52:49 2020 +0100 +++ b/src/lib/INIReader.cpp Fri Nov 27 16:29:12 2020 +0100 @@ -33,8 +33,6 @@ std::vector handlers; /** - * This might be configurable. - * * By default, we ignore all leading whitespace on continuing lines. * If there should be some spaces or tabs, they should be placed on the previous line before the „\“. * If a line break is desired, it should be written as \n (escaped) or the value should be quoted in " or '. @@ -42,11 +40,9 @@ * Related specifications: * - https://docs.oracle.com/javase/8/docs/api/index.html?java/util/Properties.html */ - bool consumeLeadingSpacesOnContinuingLines = true; + bool trimLeadingSpacesOnContinuingLines = true; /** - * This might be configurable. - * * KDE uses some weird INI dialect that allows [section][x] syntax where „x“ is kind of „tag“ that signalizes some properties of given section. * Line „[section_1][$i]“ means that the „section_1“ is „locked“. * We may emit this information somehow later, but for now, it is just ignored. @@ -59,8 +55,6 @@ bool allowSectionTags = true; /** - * This might be configurable. - * * If whole key is „aaa[bbb]“ then „aaa“ is considered to be the key and „bbb“ the sub-key. * No \[ escaping is currently supported, so the key might not contain the bracket character. * @@ -71,8 +65,6 @@ bool allowSubKeys = true; /** - * This might be configurable. - * * Classic INI uses „key=value“ syntax. * But some other formats/dialects might use key:value. * @@ -83,8 +75,6 @@ std::string keyValueSeparators = "="; /** - * This might be configurable. - * * Classic INI uses „; comment“ syntax. * But many existing files contain „# comment“ lines. * @@ -93,8 +83,6 @@ std::string commentSeparators = ";#"; /** - * This might be configurable. - * * INI often support both "quotes" and 'apostrophes' styles. * But some dialects may support only one of them or not support quoting at all. * @@ -146,7 +134,7 @@ } void processContinuingLine(std::stringstream& result) { - if (consumeLeadingSpacesOnContinuingLines) readSpacesAndTabs(); + if (trimLeadingSpacesOnContinuingLines) readSpacesAndTabs(); else result.put('\n'); } @@ -237,11 +225,49 @@ return std::regex_replace(s, std::regex("^\\s+|\\s+$"), ""); } + /** + * TODO: use a common method + */ + bool parseBoolean(const std::string& value) { + if (value == "true") return true; + else if (value == "false") return false; + else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)"); + } + + void setDialect(const std::string& name) { + if (name == "default-ini") { + // already set + } else if (name == "java-properties") { + trimLeadingSpacesOnContinuingLines = true; + allowSectionTags = false; + allowSubKeys = false; + commentSeparators = "#"; + keyValueSeparators = "=:"; + quotes = ""; + // TODO: allowSections = false; + // TODO: enable unicode unescaping + } else { + throw std::invalid_argument(std::string("Unsupported INI dialect: ") + name); + } + } + public: INIReaderImpl(std::istream& input) : input(input) { } + void setOption(const std::string& uri, const std::string& value) override { + if (uri == "trim-continuing-lines") trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean + // TODO: else if (uri == "allow-sections") allowSections = parseBoolean(value); + else if (uri == "allow-section-tags") allowSectionTags = parseBoolean(value); + else if (uri == "allow-sub-keys") allowSubKeys = parseBoolean(value); + else if (uri == "comment-separators") commentSeparators = value; + else if (uri == "key-value-separators") keyValueSeparators = value; + else if (uri == "quotes") quotes = value; + else if (uri == "dialect") setDialect(value); + else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“"); + } + void addHandler(INIContentHandler* handler) override { handlers.push_back(handler); } diff -r 29d673a54ecf -r b497140b0b63 src/lib/INIReader.h --- a/src/lib/INIReader.h Thu Nov 26 18:52:49 2020 +0100 +++ b/src/lib/INIReader.h Fri Nov 27 16:29:12 2020 +0100 @@ -32,6 +32,18 @@ class INIReader { public: virtual ~INIReader() = default; + /** + * TODO: after moving to alt2xml: + * - option will be identified by globally unique URI/IRI + * - parsers will provide catalog of supported options (names, enum values, documentation) + * - options serves as both XML parser features and properties and are mapped to them + */ + virtual void setOption(const std::string& uri, const std::string& value) = 0; + /** + * TODO: after moving to alt2xml: + * - this will be generic handler for SAX event + * - but both sides will know the schema (allowed elements and attributes for INI events) + */ virtual void addHandler(INIContentHandler* handler) = 0; virtual void process() = 0; static INIReader* create(std::istream& input);