src/lib/INIReader.cpp
branchv_0
changeset 16 b9a3c806468a
child 19 90f2b8ca32bf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/INIReader.cpp	Sun Nov 22 17:11:12 2020 +0100
@@ -0,0 +1,105 @@
+/**
+ * Relational pipes
+ * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <vector>
+#include <regex>
+
+#include "INIReader.h"
+
+class INIReaderImpl : public INIReader {
+private:
+	std::istream& input;
+	std::vector<INIContentHandler*> handlers;
+public:
+
+	INIReaderImpl(std::istream& input) : input(input) {
+	}
+
+	void addHandler(INIContentHandler* handler) override {
+		handlers.push_back(handler);
+	}
+
+	void process() override {
+
+		for (INIContentHandler* handler : handlers) handler->startDocument();
+
+		std::regex whitespacePattrern("\\s*");
+		std::regex commentPattrern("\\s*(;|#)\\s*(.*)");
+		std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*");
+		std::regex entryQuotesPattrern(/***/"\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?");
+		std::regex entryApostrophesPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?");
+		std::regex entryPlainPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*(.*)");
+
+		std::smatch match;
+		bool inSection = false;
+		std::string line;
+		int lineNumber = 0;
+		int eventNumber = 0;
+
+
+		while (std::getline(input, line)) {
+			lineNumber++;
+
+			if (std::regex_match(line, match, whitespacePattrern)) {
+				// TODO: support also whitespace
+			} else if (std::regex_match(line, match, commentPattrern)) {
+				// TODO: support also comments + emit also the comment style (;/#)
+			} else if (std::regex_match(line, match, sectionPattrern)) {
+				if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
+				inSection = true;
+				INIContentHandler::SectionStartEvent event;
+				event.lineNumber = lineNumber;
+				event.eventNumber = ++eventNumber;
+				event.name = match[1];
+				// TODO: support also comments + emit also the comment style (;/#)
+				for (INIContentHandler* handler : handlers) handler->startSection(event);
+			} else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern) || std::regex_match(line, match, entryPlainPattrern)) {
+				INIContentHandler::EntryEvent event;
+				event.lineNumber = lineNumber;
+				event.eventNumber = ++eventNumber;
+				event.key = match[2];
+				event.subKey = match[4];
+				event.fullKey = match[1];
+				event.value = match[5];
+				if (match.size() == 9) event.comment = match[8];
+				// TODO: emit also the quote style ('/"/) and surrounding whitespace
+				for (INIContentHandler* handler : handlers) handler->entry(event);
+			} else {
+				// TODO: warning, error, or support unknown content
+			}
+
+			// TODO: probably switch to state-machine approach instead of regular expressions
+			// TODO: warning/error handler
+			// TODO: support also multiline content (\ + \n)
+			// TODO: support also quoted or multiline keys?
+			// TODO: support also escaped characters
+			// TODO: support also Java .properties and manifest.mf formats?
+			// TODO: support also nested sections – hierarchy
+			// TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections
+			// TODO: support also option for alternative key-value separator (: instead of =)
+			// TODO: support also other encodings (currently only UTF-8 is supported)
+		}
+
+		if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
+
+		for (INIContentHandler* handler : handlers) handler->endDocument();
+	}
+};
+
+INIReader* INIReader::create(std::istream& input) {
+	return new INIReaderImpl(input);
+}