--- a/src/XMLDocumentConstructor.h Sun Nov 22 19:25:42 2020 +0100
+++ b/src/XMLDocumentConstructor.h Mon Nov 23 16:25:39 2020 +0100
@@ -67,15 +67,33 @@
entry->set_attribute("key", event.key);
entry->set_attribute("full-key", event.fullKey);
if (event.subKey.size()) entry->set_attribute("sub-key", event.subKey);
- if (event.comment.size()) currentSection->set_attribute("comment", event.comment);
- if (event.lineNumber >= 0) currentSection->set_attribute("line-number", std::to_string(event.lineNumber));
- if (event.eventNumber >= 0) currentSection->set_attribute("event-number", std::to_string(event.eventNumber));
+ if (event.comment.size()) entry->set_attribute("comment", event.comment);
+ if (event.lineNumber >= 0) entry->set_attribute("line-number", std::to_string(event.lineNumber));
+ if (event.eventNumber >= 0) entry->set_attribute("event-number", std::to_string(event.eventNumber));
entry->add_child_text(event.value);
};
+ void comment(const CommentEvent& event) override {
+ xmlpp::Element* comment = currentSection->add_child("comment");
+ comment->set_attribute("type", "comment");
+ if (event.lineNumber >= 0) comment->set_attribute("line-number", std::to_string(event.lineNumber));
+ if (event.eventNumber >= 0) comment->set_attribute("event-number", std::to_string(event.eventNumber));
+ comment->add_child_text(event.comment);
+ }
+
+ void whitespace(const WhitespaceEvent& event) override {
+ xmlpp::Element* comment = currentSection->add_child("whitespace");
+ comment->set_attribute("type", "whitespace");
+ if (event.lineNumber >= 0) comment->set_attribute("line-number", std::to_string(event.lineNumber));
+ if (event.eventNumber >= 0) comment->set_attribute("event-number", std::to_string(event.eventNumber));
+ comment->add_child_text(event.whitespace);
+ }
+
};
// TODO: support also other styles/mappings e.g. <section/> and <entry/> with INI names only in the XML attributes (and thus without @type="section|entry")
+// or map INI comments and whitespace to native XML comments and text nodes (but there will be no metadata like line/event numbers)
+// TODO: optional namespaces (xmlns)
class XMLDocumentConstructor {
private:
--- a/src/lib/INIContentHandler.h Sun Nov 22 19:25:42 2020 +0100
+++ b/src/lib/INIContentHandler.h Mon Nov 23 16:25:39 2020 +0100
@@ -25,26 +25,39 @@
public:
int64_t eventNumber = -1;
int64_t lineNumber = -1;
- std::string comment;
};
class SectionStartEvent : public Event {
public:
+ std::string comment;
std::string name;
};
class EntryEvent : public Event {
public:
+ std::string comment;
std::string key;
std::string subKey;
std::string fullKey;
std::string value;
};
+ class CommentEvent : public Event {
+ public:
+ std::string comment;
+ };
+
+ class WhitespaceEvent : public Event {
+ public:
+ std::string whitespace;
+ };
+
virtual ~INIContentHandler() = default;
virtual void startDocument() = 0;
virtual void endDocument() = 0;
virtual void startSection(const SectionStartEvent& event) = 0;
virtual void endSection() = 0;
virtual void entry(const EntryEvent& event) = 0;
-};
\ No newline at end of file
+ virtual void comment(const CommentEvent& event) = 0;
+ virtual void whitespace(const WhitespaceEvent& event) = 0;
+};
--- a/src/lib/INIReader.cpp Sun Nov 22 19:25:42 2020 +0100
+++ b/src/lib/INIReader.cpp Mon Nov 23 16:25:39 2020 +0100
@@ -39,7 +39,7 @@
std::regex whitespacePattrern("\\s*");
std::regex commentPattrern("\\s*(;|#)\\s*(.*)");
- std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*");
+ std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*((;|#)\\s*(.*))?");
std::regex entryQuotesPattrern(/***/"\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?");
std::regex entryApostrophesPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?");
std::regex entryPlainPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*(.*)");
@@ -55,9 +55,17 @@
lineNumber++;
if (std::regex_match(line, match, whitespacePattrern)) {
- // TODO: support also whitespace
+ INIContentHandler::WhitespaceEvent event;
+ event.lineNumber = lineNumber;
+ event.eventNumber = ++eventNumber;
+ event.whitespace = match[0];
+ for (INIContentHandler* handler : handlers) handler->whitespace(event);
} else if (std::regex_match(line, match, commentPattrern)) {
- // TODO: support also comments + emit also the comment style (;/#)
+ INIContentHandler::CommentEvent event;
+ event.lineNumber = lineNumber;
+ event.eventNumber = ++eventNumber;
+ event.comment = match[2];
+ for (INIContentHandler* handler : handlers) handler->comment(event);
} else if (std::regex_match(line, match, sectionPattrern)) {
if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
inSection = true;
@@ -65,7 +73,7 @@
event.lineNumber = lineNumber;
event.eventNumber = ++eventNumber;
event.name = match[1];
- // TODO: support also comments + emit also the comment style (;/#)
+ event.comment = match[4];
for (INIContentHandler* handler : handlers) handler->startSection(event);
} else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern) || std::regex_match(line, match, entryPlainPattrern)) {
INIContentHandler::EntryEvent event;
@@ -76,13 +84,13 @@
event.fullKey = match[1];
event.value = match[5];
if (match.size() == 9) event.comment = match[8];
- // TODO: emit also the quote style ('/"/) and surrounding whitespace
for (INIContentHandler* handler : handlers) handler->entry(event);
} else {
// TODO: warning, error, or support unknown content
}
- // TODO: probably switch to state-machine approach instead of regular expressions
+ // General feautres:
+ // TODO: probably switch to state-machine approach instead of regular expressions or use an existing library
// TODO: warning/error handler
// TODO: support also multiline content (\ + \n)
// TODO: support also quoted or multiline keys?
@@ -92,6 +100,12 @@
// TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections
// TODO: support also option for alternative key-value separator (: instead of =)
// TODO: support also other encodings (currently only UTF-8 is supported)
+
+ // Lossless conversions:
+ // TODO: emit also the quote style ('/"/)
+ // TODO: emit also the comment style (;/#) ?
+ // TODO: emit also the whitespace before key name, around =, after "values"/'values', around [sections] ?
+ // TODO: emit also the line-end type (LF/CRLF) ?
}
if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();