# HG changeset patch # User František Kučera # Date 1606145157 -3600 # Node ID fb717cfbfea1e60861ebec7c93b4461ef3e2741a # Parent d70ea23682aabbed74d519a8ebc1bdbf38318069 improved support for comments and whitespace diff -r d70ea23682aa -r fb717cfbfea1 bash-completion.sh --- a/bash-completion.sh Sun Nov 22 19:22:25 2020 +0100 +++ b/bash-completion.sh Mon Nov 23 16:25:57 2020 +0100 @@ -31,6 +31,7 @@ elif [[ "$w1" == "--enable-sections" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w1" == "--enable-sub-keys" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w1" == "--enable-comments" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w1" == "--enable-whitespace" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w1" == "--enable-line-numbers" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) elif [[ "$w1" == "--enable-event-numbers" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) else @@ -39,6 +40,7 @@ "--enable-sections" "--enable-sub-keys" "--enable-comments" + "--enable-whitespace" "--enable-line-numbers" "--enable-event-numbers" ) diff -r d70ea23682aa -r fb717cfbfea1 src/CLIParser.h --- a/src/CLIParser.h Sun Nov 22 19:22:25 2020 +0100 +++ b/src/CLIParser.h Mon Nov 23 16:25:57 2020 +0100 @@ -52,6 +52,7 @@ static const relpipe::writer::string_t OPTION_ENABLE_SECTIONS; static const relpipe::writer::string_t OPTION_ENABLE_SUB_KEYS; static const relpipe::writer::string_t OPTION_ENABLE_COMMENTS; + static const relpipe::writer::string_t OPTION_ENABLE_WHITESPACE; static const relpipe::writer::string_t OPTION_ENABLE_LINE_NUMBERS; static const relpipe::writer::string_t OPTION_ENABLE_EVENT_NUMBERS; @@ -65,6 +66,7 @@ else if (option == OPTION_ENABLE_SECTIONS) c.enableSections = parseBoolean(readNext(arguments, i)); else if (option == OPTION_ENABLE_SUB_KEYS) c.enableSubKeys = parseBoolean(readNext(arguments, i)); else if (option == OPTION_ENABLE_COMMENTS) c.enableComments = parseBoolean(readNext(arguments, i)); + else if (option == OPTION_ENABLE_WHITESPACE) c.enableWhitespace = parseBoolean(readNext(arguments, i)); else if (option == OPTION_ENABLE_LINE_NUMBERS) c.enableLineNumbers = parseBoolean(readNext(arguments, i)); else if (option == OPTION_ENABLE_EVENT_NUMBERS) c.enableEventNumbers = parseBoolean(readNext(arguments, i)); else throw relpipe::cli::RelpipeCLIException(L"Unsupported CLI option: " + option, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS); @@ -81,6 +83,7 @@ const relpipe::writer::string_t CLIParser::OPTION_ENABLE_SECTIONS = L"--enable-sections"; const relpipe::writer::string_t CLIParser::OPTION_ENABLE_SUB_KEYS = L"--enable-sub-keys"; const relpipe::writer::string_t CLIParser::OPTION_ENABLE_COMMENTS = L"--enable-comments"; +const relpipe::writer::string_t CLIParser::OPTION_ENABLE_WHITESPACE = L"--enable-whitespace"; const relpipe::writer::string_t CLIParser::OPTION_ENABLE_LINE_NUMBERS = L"--enable-line-numbers"; const relpipe::writer::string_t CLIParser::OPTION_ENABLE_EVENT_NUMBERS = L"--enable-event-numbers"; diff -r d70ea23682aa -r fb717cfbfea1 src/Configuration.h --- a/src/Configuration.h Sun Nov 22 19:22:25 2020 +0100 +++ b/src/Configuration.h Mon Nov 23 16:25:57 2020 +0100 @@ -34,6 +34,7 @@ relpipe::writer::boolean_t enableSections = true; relpipe::writer::boolean_t enableSubKeys = false; relpipe::writer::boolean_t enableComments = false; + relpipe::writer::boolean_t enableWhitespace = false; virtual ~Configuration() { } diff -r d70ea23682aa -r fb717cfbfea1 src/INICommand.cpp --- a/src/INICommand.cpp Sun Nov 22 19:22:25 2020 +0100 +++ b/src/INICommand.cpp Mon Nov 23 16:25:57 2020 +0100 @@ -73,6 +73,7 @@ if (configuration.enableSubKeys) metadata.push_back({L"sub_key", TypeId::STRING}); metadata.push_back({L"value", TypeId::STRING}); if (configuration.enableComments) metadata.push_back({L"comment", TypeId::STRING}); + if (configuration.enableWhitespace) metadata.push_back({L"whitespace", TypeId::STRING}); writer->startRelation(configuration.relation, metadata, true); }; @@ -102,9 +103,47 @@ writer->writeAttribute(convertor.from_bytes(event.value)); if (configuration.enableComments) writer->writeAttribute(convertor.from_bytes(event.comment)); + if (configuration.enableWhitespace) writer->writeAttribute(L""); }; - // TODO: handle also comments and whitespace (to allow lossless transformation from INI and back to INI) + void comment(const CommentEvent& event) override { + if (configuration.enableComments) { + if (configuration.enableLineNumbers) writer->writeAttribute(&event.lineNumber, typeid (event.lineNumber)); + if (configuration.enableEventNumbers) writer->writeAttribute(&event.eventNumber, typeid (event.eventNumber)); + + std::string section = getCurrentSectionFullName(); + std::string key; + + if (configuration.enableSections) writer->writeAttribute(convertor.from_bytes(section)); + else if (section.size()) key = section + "/"; + writer->writeAttribute(convertor.from_bytes(key)); + + if (configuration.enableSubKeys) writer->writeAttribute(L""); + writer->writeAttribute(L""); // value + writer->writeAttribute(convertor.from_bytes(event.comment)); + if (configuration.enableWhitespace) writer->writeAttribute(L""); + } + } + + void whitespace(const WhitespaceEvent& event) override { + if (configuration.enableWhitespace) { + if (configuration.enableLineNumbers) writer->writeAttribute(&event.lineNumber, typeid (event.lineNumber)); + if (configuration.enableEventNumbers) writer->writeAttribute(&event.eventNumber, typeid (event.eventNumber)); + + std::string section = getCurrentSectionFullName(); + std::string key; + + if (configuration.enableSections) writer->writeAttribute(convertor.from_bytes(section)); + else if (section.size()) key = section + "/"; + writer->writeAttribute(convertor.from_bytes(key)); + + if (configuration.enableSubKeys) writer->writeAttribute(L""); + writer->writeAttribute(L""); // value + if (configuration.enableComments) writer->writeAttribute(L""); + writer->writeAttribute(convertor.from_bytes(event.whitespace)); + } + } + // TODO: unify methods, DRY }; diff -r d70ea23682aa -r fb717cfbfea1 src/lib/INIContentHandler.h --- a/src/lib/INIContentHandler.h Sun Nov 22 19:22:25 2020 +0100 +++ b/src/lib/INIContentHandler.h Mon Nov 23 16:25:57 2020 +0100 @@ -25,26 +25,39 @@ public: int64_t eventNumber = -1; int64_t lineNumber = -1; - std::string comment; }; class SectionStartEvent : public Event { public: + std::string comment; std::string name; }; class EntryEvent : public Event { public: + std::string comment; std::string key; std::string subKey; std::string fullKey; std::string value; }; + class CommentEvent : public Event { + public: + std::string comment; + }; + + class WhitespaceEvent : public Event { + public: + std::string whitespace; + }; + virtual ~INIContentHandler() = default; virtual void startDocument() = 0; virtual void endDocument() = 0; virtual void startSection(const SectionStartEvent& event) = 0; virtual void endSection() = 0; virtual void entry(const EntryEvent& event) = 0; -}; \ No newline at end of file + virtual void comment(const CommentEvent& event) = 0; + virtual void whitespace(const WhitespaceEvent& event) = 0; +}; diff -r d70ea23682aa -r fb717cfbfea1 src/lib/INIReader.cpp --- a/src/lib/INIReader.cpp Sun Nov 22 19:22:25 2020 +0100 +++ b/src/lib/INIReader.cpp Mon Nov 23 16:25:57 2020 +0100 @@ -39,7 +39,7 @@ std::regex whitespacePattrern("\\s*"); std::regex commentPattrern("\\s*(;|#)\\s*(.*)"); - std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*"); + std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*((;|#)\\s*(.*))?"); std::regex entryQuotesPattrern(/***/"\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?"); std::regex entryApostrophesPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?"); std::regex entryPlainPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*(.*)"); @@ -55,9 +55,17 @@ lineNumber++; if (std::regex_match(line, match, whitespacePattrern)) { - // TODO: support also whitespace + INIContentHandler::WhitespaceEvent event; + event.lineNumber = lineNumber; + event.eventNumber = ++eventNumber; + event.whitespace = match[0]; + for (INIContentHandler* handler : handlers) handler->whitespace(event); } else if (std::regex_match(line, match, commentPattrern)) { - // TODO: support also comments + emit also the comment style (;/#) + INIContentHandler::CommentEvent event; + event.lineNumber = lineNumber; + event.eventNumber = ++eventNumber; + event.comment = match[2]; + for (INIContentHandler* handler : handlers) handler->comment(event); } else if (std::regex_match(line, match, sectionPattrern)) { if (inSection) for (INIContentHandler* handler : handlers) handler->endSection(); inSection = true; @@ -65,7 +73,7 @@ event.lineNumber = lineNumber; event.eventNumber = ++eventNumber; event.name = match[1]; - // TODO: support also comments + emit also the comment style (;/#) + event.comment = match[4]; for (INIContentHandler* handler : handlers) handler->startSection(event); } else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern) || std::regex_match(line, match, entryPlainPattrern)) { INIContentHandler::EntryEvent event; @@ -76,13 +84,13 @@ event.fullKey = match[1]; event.value = match[5]; if (match.size() == 9) event.comment = match[8]; - // TODO: emit also the quote style ('/"/) and surrounding whitespace for (INIContentHandler* handler : handlers) handler->entry(event); } else { // TODO: warning, error, or support unknown content } - // TODO: probably switch to state-machine approach instead of regular expressions + // General feautres: + // TODO: probably switch to state-machine approach instead of regular expressions or use an existing library // TODO: warning/error handler // TODO: support also multiline content (\ + \n) // TODO: support also quoted or multiline keys? @@ -92,6 +100,12 @@ // TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections // TODO: support also option for alternative key-value separator (: instead of =) // TODO: support also other encodings (currently only UTF-8 is supported) + + // Lossless conversions: + // TODO: emit also the quote style ('/"/) + // TODO: emit also the comment style (;/#) ? + // TODO: emit also the whitespace before key name, around =, after "values"/'values', around [sections] ? + // TODO: emit also the line-end type (LF/CRLF) ? } if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();