improved support for comments and whitespace v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Mon, 23 Nov 2020 16:25:39 +0100
branchv_0
changeset 19 90f2b8ca32bf
parent 18 45c06bdf9045
child 20 fc8f9aab211d
improved support for comments and whitespace
src/XMLDocumentConstructor.h
src/lib/INIContentHandler.h
src/lib/INIReader.cpp
--- a/src/XMLDocumentConstructor.h	Sun Nov 22 19:25:42 2020 +0100
+++ b/src/XMLDocumentConstructor.h	Mon Nov 23 16:25:39 2020 +0100
@@ -67,15 +67,33 @@
 		entry->set_attribute("key", event.key);
 		entry->set_attribute("full-key", event.fullKey);
 		if (event.subKey.size()) entry->set_attribute("sub-key", event.subKey);
-		if (event.comment.size()) currentSection->set_attribute("comment", event.comment);
-		if (event.lineNumber >= 0) currentSection->set_attribute("line-number", std::to_string(event.lineNumber));
-		if (event.eventNumber >= 0) currentSection->set_attribute("event-number", std::to_string(event.eventNumber));
+		if (event.comment.size()) entry->set_attribute("comment", event.comment);
+		if (event.lineNumber >= 0) entry->set_attribute("line-number", std::to_string(event.lineNumber));
+		if (event.eventNumber >= 0) entry->set_attribute("event-number", std::to_string(event.eventNumber));
 		entry->add_child_text(event.value);
 	};
 
+	void comment(const CommentEvent& event) override {
+		xmlpp::Element* comment = currentSection->add_child("comment");
+		comment->set_attribute("type", "comment");
+		if (event.lineNumber >= 0) comment->set_attribute("line-number", std::to_string(event.lineNumber));
+		if (event.eventNumber >= 0) comment->set_attribute("event-number", std::to_string(event.eventNumber));
+		comment->add_child_text(event.comment);
+	}
+
+	void whitespace(const WhitespaceEvent& event) override {
+		xmlpp::Element* comment = currentSection->add_child("whitespace");
+		comment->set_attribute("type", "whitespace");
+		if (event.lineNumber >= 0) comment->set_attribute("line-number", std::to_string(event.lineNumber));
+		if (event.eventNumber >= 0) comment->set_attribute("event-number", std::to_string(event.eventNumber));
+		comment->add_child_text(event.whitespace);
+	}
+
 };
 
 // TODO: support also other styles/mappings e.g. <section/> and <entry/> with INI names only in the XML attributes (and thus without @type="section|entry")
+// or map INI comments and whitespace to native XML comments and text nodes (but there will be no metadata like line/event numbers)
+// TODO: optional namespaces (xmlns)
 
 class XMLDocumentConstructor {
 private:
--- a/src/lib/INIContentHandler.h	Sun Nov 22 19:25:42 2020 +0100
+++ b/src/lib/INIContentHandler.h	Mon Nov 23 16:25:39 2020 +0100
@@ -25,26 +25,39 @@
 	public:
 		int64_t eventNumber = -1;
 		int64_t lineNumber = -1;
-		std::string comment;
 	};
 
 	class SectionStartEvent : public Event {
 	public:
+		std::string comment;
 		std::string name;
 	};
 
 	class EntryEvent : public Event {
 	public:
+		std::string comment;
 		std::string key;
 		std::string subKey;
 		std::string fullKey;
 		std::string value;
 	};
 
+	class CommentEvent : public Event {
+	public:
+		std::string comment;
+	};
+
+	class WhitespaceEvent : public Event {
+	public:
+		std::string whitespace;
+	};
+
 	virtual ~INIContentHandler() = default;
 	virtual void startDocument() = 0;
 	virtual void endDocument() = 0;
 	virtual void startSection(const SectionStartEvent& event) = 0;
 	virtual void endSection() = 0;
 	virtual void entry(const EntryEvent& event) = 0;
-};
\ No newline at end of file
+	virtual void comment(const CommentEvent& event) = 0;
+	virtual void whitespace(const WhitespaceEvent& event) = 0;
+};
--- a/src/lib/INIReader.cpp	Sun Nov 22 19:25:42 2020 +0100
+++ b/src/lib/INIReader.cpp	Mon Nov 23 16:25:39 2020 +0100
@@ -39,7 +39,7 @@
 
 		std::regex whitespacePattrern("\\s*");
 		std::regex commentPattrern("\\s*(;|#)\\s*(.*)");
-		std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*");
+		std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*((;|#)\\s*(.*))?");
 		std::regex entryQuotesPattrern(/***/"\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?");
 		std::regex entryApostrophesPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?");
 		std::regex entryPlainPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*(.*)");
@@ -55,9 +55,17 @@
 			lineNumber++;
 
 			if (std::regex_match(line, match, whitespacePattrern)) {
-				// TODO: support also whitespace
+				INIContentHandler::WhitespaceEvent event;
+				event.lineNumber = lineNumber;
+				event.eventNumber = ++eventNumber;
+				event.whitespace = match[0];
+				for (INIContentHandler* handler : handlers) handler->whitespace(event);
 			} else if (std::regex_match(line, match, commentPattrern)) {
-				// TODO: support also comments + emit also the comment style (;/#)
+				INIContentHandler::CommentEvent event;
+				event.lineNumber = lineNumber;
+				event.eventNumber = ++eventNumber;
+				event.comment = match[2];
+				for (INIContentHandler* handler : handlers) handler->comment(event);
 			} else if (std::regex_match(line, match, sectionPattrern)) {
 				if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
 				inSection = true;
@@ -65,7 +73,7 @@
 				event.lineNumber = lineNumber;
 				event.eventNumber = ++eventNumber;
 				event.name = match[1];
-				// TODO: support also comments + emit also the comment style (;/#)
+				event.comment = match[4];
 				for (INIContentHandler* handler : handlers) handler->startSection(event);
 			} else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern) || std::regex_match(line, match, entryPlainPattrern)) {
 				INIContentHandler::EntryEvent event;
@@ -76,13 +84,13 @@
 				event.fullKey = match[1];
 				event.value = match[5];
 				if (match.size() == 9) event.comment = match[8];
-				// TODO: emit also the quote style ('/"/) and surrounding whitespace
 				for (INIContentHandler* handler : handlers) handler->entry(event);
 			} else {
 				// TODO: warning, error, or support unknown content
 			}
 
-			// TODO: probably switch to state-machine approach instead of regular expressions
+			// General feautres:
+			// TODO: probably switch to state-machine approach instead of regular expressions or use an existing library
 			// TODO: warning/error handler
 			// TODO: support also multiline content (\ + \n)
 			// TODO: support also quoted or multiline keys?
@@ -92,6 +100,12 @@
 			// TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections
 			// TODO: support also option for alternative key-value separator (: instead of =)
 			// TODO: support also other encodings (currently only UTF-8 is supported)
+			
+			// Lossless conversions:
+			// TODO: emit also the quote style ('/"/)
+			// TODO: emit also the comment style (;/#) ?
+			// TODO: emit also the whitespace before key name, around =, after "values"/'values', around [sections] ?
+			// TODO: emit also the line-end type (LF/CRLF) ?
 		}
 
 		if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();