src/lib/INIReader.cpp
branchv_0
changeset 1 3876a9c56a66
parent 0 16c7fa9b7c49
child 2 f031a4dc7c52
--- a/src/lib/INIReader.cpp	Sat Nov 21 18:26:39 2020 +0100
+++ b/src/lib/INIReader.cpp	Sat Nov 21 20:09:18 2020 +0100
@@ -16,6 +16,7 @@
  */
 
 #include <vector>
+#include <regex>
 
 #include "INIReader.h"
 
@@ -33,34 +34,55 @@
 	}
 
 	void process() override {
-		
-		// TODO: real parser instead of demo data
-		for (INIContentHandler* handler : handlers) {
-			handler->startDocument();
-			
-			handler->entry("key-0", "outside sections");
-			
-			handler->startSection("section-1");
-			handler->entry("key-1", "in section 1");
-			handler->entry("key-2", "in section 1");
-			handler->entry("key-3", "in section 1");
-			
-			handler->startSection("nested-section-1-1");
-			handler->entry("key-1", "in nested section 1-1");
-			handler->entry("key-2", "in nested section 1-1");
-			handler->endSection();
-			
-			handler->endSection();
-			
-			handler->startSection("section-2");
-			handler->entry("key-1", "in section 2");
-			handler->endSection();
-			
-			handler->entry("key-666", "outside sections again; this normally would not happen, but should be supported");
-			
-			handler->endDocument();
+
+		for (INIContentHandler* handler : handlers) handler->startDocument();
+
+		std::regex whitespacePattrern("\\s*");
+		std::regex commentPattrern("\\s*(;|#)\\s*(.*)");
+		std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*");
+		std::regex entryQuotesPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?");
+		std::regex entryApostrophesPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?");
+		std::regex entryPlainPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*(.*)");
+
+		std::smatch match;
+		std::string section;
+		std::string line;
+
+		while (std::getline(input, line)) {
+
+			if (std::regex_match(line, match, whitespacePattrern)) {
+				// TODO: support also whitespace
+			} else if (std::regex_match(line, match, commentPattrern)) {
+				// TODO: support also comments + emit also the comment style (;/#)
+			} else if (std::regex_match(line, match, sectionPattrern)) {
+				if (section.size()) for (INIContentHandler* handler : handlers) handler->endSection();
+				section = match[1];
+				for (INIContentHandler* handler : handlers) handler->startSection(section);
+			} else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern)) {
+				// TODO: support also comments + emit also the comment style (;/#)
+				// TODO: emit also the quote style ('/"/) and surrounding whitespace
+				for (INIContentHandler* handler : handlers) handler->entry(match[1], match[3], match[4]);
+			} else if (std::regex_match(line, match, entryPlainPattrern)) {
+				for (INIContentHandler* handler : handlers) handler->entry(match[1], match[3], match[4]);
+			} else {
+				// TODO: warning, error, or support unknown content
+			}
+
+			// TODO: probably switch to state-machine approach instead of regular expressions
+			// TODO: warning/error handler
+			// TODO: support also multiline content (\ + \n)
+			// TODO: support also quoted or multiline keys?
+			// TODO: support also escaped characters
+			// TODO: support also Java .properties and manifest.mf formats?
+			// TODO: support also nested sections – hierarchy
+			// TODO: support also option for alternative key-value separator (: instead of =)
+			// TODO: support also other encodings (currently only UTF-8 is supported)
+			// TODO: emit line numbers and/or event order?
 		}
-		
+
+		if (section.size()) for (INIContentHandler* handler : handlers) handler->endSection();
+
+		for (INIContentHandler* handler : handlers) handler->endDocument();
 	}
 };