# HG changeset patch # User František Kučera # Date 1606145139 -3600 # Node ID 90f2b8ca32bfa651a52f486cf784d4deec4dce52 # Parent 45c06bdf90457cadfb2879ab39bccb6c65984532 improved support for comments and whitespace diff -r 45c06bdf9045 -r 90f2b8ca32bf src/XMLDocumentConstructor.h --- a/src/XMLDocumentConstructor.h Sun Nov 22 19:25:42 2020 +0100 +++ b/src/XMLDocumentConstructor.h Mon Nov 23 16:25:39 2020 +0100 @@ -67,15 +67,33 @@ entry->set_attribute("key", event.key); entry->set_attribute("full-key", event.fullKey); if (event.subKey.size()) entry->set_attribute("sub-key", event.subKey); - if (event.comment.size()) currentSection->set_attribute("comment", event.comment); - if (event.lineNumber >= 0) currentSection->set_attribute("line-number", std::to_string(event.lineNumber)); - if (event.eventNumber >= 0) currentSection->set_attribute("event-number", std::to_string(event.eventNumber)); + if (event.comment.size()) entry->set_attribute("comment", event.comment); + if (event.lineNumber >= 0) entry->set_attribute("line-number", std::to_string(event.lineNumber)); + if (event.eventNumber >= 0) entry->set_attribute("event-number", std::to_string(event.eventNumber)); entry->add_child_text(event.value); }; + void comment(const CommentEvent& event) override { + xmlpp::Element* comment = currentSection->add_child("comment"); + comment->set_attribute("type", "comment"); + if (event.lineNumber >= 0) comment->set_attribute("line-number", std::to_string(event.lineNumber)); + if (event.eventNumber >= 0) comment->set_attribute("event-number", std::to_string(event.eventNumber)); + comment->add_child_text(event.comment); + } + + void whitespace(const WhitespaceEvent& event) override { + xmlpp::Element* comment = currentSection->add_child("whitespace"); + comment->set_attribute("type", "whitespace"); + if (event.lineNumber >= 0) comment->set_attribute("line-number", std::to_string(event.lineNumber)); + if (event.eventNumber >= 0) comment->set_attribute("event-number", std::to_string(event.eventNumber)); + comment->add_child_text(event.whitespace); + } + }; // TODO: support also other styles/mappings e.g.
and with INI names only in the XML attributes (and thus without @type="section|entry") +// or map INI comments and whitespace to native XML comments and text nodes (but there will be no metadata like line/event numbers) +// TODO: optional namespaces (xmlns) class XMLDocumentConstructor { private: diff -r 45c06bdf9045 -r 90f2b8ca32bf src/lib/INIContentHandler.h --- a/src/lib/INIContentHandler.h Sun Nov 22 19:25:42 2020 +0100 +++ b/src/lib/INIContentHandler.h Mon Nov 23 16:25:39 2020 +0100 @@ -25,26 +25,39 @@ public: int64_t eventNumber = -1; int64_t lineNumber = -1; - std::string comment; }; class SectionStartEvent : public Event { public: + std::string comment; std::string name; }; class EntryEvent : public Event { public: + std::string comment; std::string key; std::string subKey; std::string fullKey; std::string value; }; + class CommentEvent : public Event { + public: + std::string comment; + }; + + class WhitespaceEvent : public Event { + public: + std::string whitespace; + }; + virtual ~INIContentHandler() = default; virtual void startDocument() = 0; virtual void endDocument() = 0; virtual void startSection(const SectionStartEvent& event) = 0; virtual void endSection() = 0; virtual void entry(const EntryEvent& event) = 0; -}; \ No newline at end of file + virtual void comment(const CommentEvent& event) = 0; + virtual void whitespace(const WhitespaceEvent& event) = 0; +}; diff -r 45c06bdf9045 -r 90f2b8ca32bf src/lib/INIReader.cpp --- a/src/lib/INIReader.cpp Sun Nov 22 19:25:42 2020 +0100 +++ b/src/lib/INIReader.cpp Mon Nov 23 16:25:39 2020 +0100 @@ -39,7 +39,7 @@ std::regex whitespacePattrern("\\s*"); std::regex commentPattrern("\\s*(;|#)\\s*(.*)"); - std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*"); + std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*((;|#)\\s*(.*))?"); std::regex entryQuotesPattrern(/***/"\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?"); std::regex entryApostrophesPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?"); std::regex entryPlainPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*(.*)"); @@ -55,9 +55,17 @@ lineNumber++; if (std::regex_match(line, match, whitespacePattrern)) { - // TODO: support also whitespace + INIContentHandler::WhitespaceEvent event; + event.lineNumber = lineNumber; + event.eventNumber = ++eventNumber; + event.whitespace = match[0]; + for (INIContentHandler* handler : handlers) handler->whitespace(event); } else if (std::regex_match(line, match, commentPattrern)) { - // TODO: support also comments + emit also the comment style (;/#) + INIContentHandler::CommentEvent event; + event.lineNumber = lineNumber; + event.eventNumber = ++eventNumber; + event.comment = match[2]; + for (INIContentHandler* handler : handlers) handler->comment(event); } else if (std::regex_match(line, match, sectionPattrern)) { if (inSection) for (INIContentHandler* handler : handlers) handler->endSection(); inSection = true; @@ -65,7 +73,7 @@ event.lineNumber = lineNumber; event.eventNumber = ++eventNumber; event.name = match[1]; - // TODO: support also comments + emit also the comment style (;/#) + event.comment = match[4]; for (INIContentHandler* handler : handlers) handler->startSection(event); } else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern) || std::regex_match(line, match, entryPlainPattrern)) { INIContentHandler::EntryEvent event; @@ -76,13 +84,13 @@ event.fullKey = match[1]; event.value = match[5]; if (match.size() == 9) event.comment = match[8]; - // TODO: emit also the quote style ('/"/) and surrounding whitespace for (INIContentHandler* handler : handlers) handler->entry(event); } else { // TODO: warning, error, or support unknown content } - // TODO: probably switch to state-machine approach instead of regular expressions + // General feautres: + // TODO: probably switch to state-machine approach instead of regular expressions or use an existing library // TODO: warning/error handler // TODO: support also multiline content (\ + \n) // TODO: support also quoted or multiline keys? @@ -92,6 +100,12 @@ // TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections // TODO: support also option for alternative key-value separator (: instead of =) // TODO: support also other encodings (currently only UTF-8 is supported) + + // Lossless conversions: + // TODO: emit also the quote style ('/"/) + // TODO: emit also the comment style (;/#) ? + // TODO: emit also the whitespace before key name, around =, after "values"/'values', around [sections] ? + // TODO: emit also the line-end type (LF/CRLF) ? } if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();