improved support for comments and whitespace v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Mon, 23 Nov 2020 16:25:57 +0100
branchv_0
changeset 6 fb717cfbfea1
parent 5 d70ea23682aa
child 7 95b21edc9519
improved support for comments and whitespace
bash-completion.sh
src/CLIParser.h
src/Configuration.h
src/INICommand.cpp
src/lib/INIContentHandler.h
src/lib/INIReader.cpp
--- a/bash-completion.sh	Sun Nov 22 19:22:25 2020 +0100
+++ b/bash-completion.sh	Mon Nov 23 16:25:57 2020 +0100
@@ -31,6 +31,7 @@
 	elif [[ "$w1" == "--enable-sections"                                ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
 	elif [[ "$w1" == "--enable-sub-keys"                                ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
 	elif [[ "$w1" == "--enable-comments"                                ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+	elif [[ "$w1" == "--enable-whitespace"                              ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
 	elif [[ "$w1" == "--enable-line-numbers"                            ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
 	elif [[ "$w1" == "--enable-event-numbers"                           ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
 	else
@@ -39,6 +40,7 @@
 			"--enable-sections"
 			"--enable-sub-keys"
 			"--enable-comments"
+			"--enable-whitespace"
 			"--enable-line-numbers"
 			"--enable-event-numbers"
 		)
--- a/src/CLIParser.h	Sun Nov 22 19:22:25 2020 +0100
+++ b/src/CLIParser.h	Mon Nov 23 16:25:57 2020 +0100
@@ -52,6 +52,7 @@
 	static const relpipe::writer::string_t OPTION_ENABLE_SECTIONS;
 	static const relpipe::writer::string_t OPTION_ENABLE_SUB_KEYS;
 	static const relpipe::writer::string_t OPTION_ENABLE_COMMENTS;
+	static const relpipe::writer::string_t OPTION_ENABLE_WHITESPACE;
 	static const relpipe::writer::string_t OPTION_ENABLE_LINE_NUMBERS;
 	static const relpipe::writer::string_t OPTION_ENABLE_EVENT_NUMBERS;
 
@@ -65,6 +66,7 @@
 			else if (option == OPTION_ENABLE_SECTIONS) c.enableSections = parseBoolean(readNext(arguments, i));
 			else if (option == OPTION_ENABLE_SUB_KEYS) c.enableSubKeys = parseBoolean(readNext(arguments, i));
 			else if (option == OPTION_ENABLE_COMMENTS) c.enableComments = parseBoolean(readNext(arguments, i));
+			else if (option == OPTION_ENABLE_WHITESPACE) c.enableWhitespace = parseBoolean(readNext(arguments, i));
 			else if (option == OPTION_ENABLE_LINE_NUMBERS) c.enableLineNumbers = parseBoolean(readNext(arguments, i));
 			else if (option == OPTION_ENABLE_EVENT_NUMBERS) c.enableEventNumbers = parseBoolean(readNext(arguments, i));
 			else throw relpipe::cli::RelpipeCLIException(L"Unsupported CLI option: " + option, relpipe::cli::CLI::EXIT_CODE_BAD_CLI_ARGUMENTS);
@@ -81,6 +83,7 @@
 const relpipe::writer::string_t CLIParser::OPTION_ENABLE_SECTIONS = L"--enable-sections";
 const relpipe::writer::string_t CLIParser::OPTION_ENABLE_SUB_KEYS = L"--enable-sub-keys";
 const relpipe::writer::string_t CLIParser::OPTION_ENABLE_COMMENTS = L"--enable-comments";
+const relpipe::writer::string_t CLIParser::OPTION_ENABLE_WHITESPACE = L"--enable-whitespace";
 const relpipe::writer::string_t CLIParser::OPTION_ENABLE_LINE_NUMBERS = L"--enable-line-numbers";
 const relpipe::writer::string_t CLIParser::OPTION_ENABLE_EVENT_NUMBERS = L"--enable-event-numbers";
 
--- a/src/Configuration.h	Sun Nov 22 19:22:25 2020 +0100
+++ b/src/Configuration.h	Mon Nov 23 16:25:57 2020 +0100
@@ -34,6 +34,7 @@
 	relpipe::writer::boolean_t enableSections = true;
 	relpipe::writer::boolean_t enableSubKeys = false;
 	relpipe::writer::boolean_t enableComments = false;
+	relpipe::writer::boolean_t enableWhitespace = false;
 
 	virtual ~Configuration() {
 	}
--- a/src/INICommand.cpp	Sun Nov 22 19:22:25 2020 +0100
+++ b/src/INICommand.cpp	Mon Nov 23 16:25:57 2020 +0100
@@ -73,6 +73,7 @@
 		if (configuration.enableSubKeys) metadata.push_back({L"sub_key", TypeId::STRING});
 		metadata.push_back({L"value", TypeId::STRING});
 		if (configuration.enableComments) metadata.push_back({L"comment", TypeId::STRING});
+		if (configuration.enableWhitespace) metadata.push_back({L"whitespace", TypeId::STRING});
 		writer->startRelation(configuration.relation, metadata, true);
 	};
 
@@ -102,9 +103,47 @@
 
 		writer->writeAttribute(convertor.from_bytes(event.value));
 		if (configuration.enableComments) writer->writeAttribute(convertor.from_bytes(event.comment));
+		if (configuration.enableWhitespace) writer->writeAttribute(L"");
 	};
 
-	// TODO: handle also comments and whitespace (to allow lossless transformation from INI and back to INI)
+	void comment(const CommentEvent& event) override {
+		if (configuration.enableComments) {
+			if (configuration.enableLineNumbers) writer->writeAttribute(&event.lineNumber, typeid (event.lineNumber));
+			if (configuration.enableEventNumbers) writer->writeAttribute(&event.eventNumber, typeid (event.eventNumber));
+
+			std::string section = getCurrentSectionFullName();
+			std::string key;
+
+			if (configuration.enableSections) writer->writeAttribute(convertor.from_bytes(section));
+			else if (section.size()) key = section + "/";
+			writer->writeAttribute(convertor.from_bytes(key));
+
+			if (configuration.enableSubKeys) writer->writeAttribute(L"");
+			writer->writeAttribute(L""); // value
+			writer->writeAttribute(convertor.from_bytes(event.comment));
+			if (configuration.enableWhitespace) writer->writeAttribute(L"");
+		}
+	}
+
+	void whitespace(const WhitespaceEvent& event) override {
+		if (configuration.enableWhitespace) {
+			if (configuration.enableLineNumbers) writer->writeAttribute(&event.lineNumber, typeid (event.lineNumber));
+			if (configuration.enableEventNumbers) writer->writeAttribute(&event.eventNumber, typeid (event.eventNumber));
+
+			std::string section = getCurrentSectionFullName();
+			std::string key;
+
+			if (configuration.enableSections) writer->writeAttribute(convertor.from_bytes(section));
+			else if (section.size()) key = section + "/";
+			writer->writeAttribute(convertor.from_bytes(key));
+
+			if (configuration.enableSubKeys) writer->writeAttribute(L"");
+			writer->writeAttribute(L""); // value
+			if (configuration.enableComments) writer->writeAttribute(L"");
+			writer->writeAttribute(convertor.from_bytes(event.whitespace));
+		}
+	}
+	// TODO: unify methods, DRY
 
 };
 
--- a/src/lib/INIContentHandler.h	Sun Nov 22 19:22:25 2020 +0100
+++ b/src/lib/INIContentHandler.h	Mon Nov 23 16:25:57 2020 +0100
@@ -25,26 +25,39 @@
 	public:
 		int64_t eventNumber = -1;
 		int64_t lineNumber = -1;
-		std::string comment;
 	};
 
 	class SectionStartEvent : public Event {
 	public:
+		std::string comment;
 		std::string name;
 	};
 
 	class EntryEvent : public Event {
 	public:
+		std::string comment;
 		std::string key;
 		std::string subKey;
 		std::string fullKey;
 		std::string value;
 	};
 
+	class CommentEvent : public Event {
+	public:
+		std::string comment;
+	};
+
+	class WhitespaceEvent : public Event {
+	public:
+		std::string whitespace;
+	};
+
 	virtual ~INIContentHandler() = default;
 	virtual void startDocument() = 0;
 	virtual void endDocument() = 0;
 	virtual void startSection(const SectionStartEvent& event) = 0;
 	virtual void endSection() = 0;
 	virtual void entry(const EntryEvent& event) = 0;
-};
\ No newline at end of file
+	virtual void comment(const CommentEvent& event) = 0;
+	virtual void whitespace(const WhitespaceEvent& event) = 0;
+};
--- a/src/lib/INIReader.cpp	Sun Nov 22 19:22:25 2020 +0100
+++ b/src/lib/INIReader.cpp	Mon Nov 23 16:25:57 2020 +0100
@@ -39,7 +39,7 @@
 
 		std::regex whitespacePattrern("\\s*");
 		std::regex commentPattrern("\\s*(;|#)\\s*(.*)");
-		std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*");
+		std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*((;|#)\\s*(.*))?");
 		std::regex entryQuotesPattrern(/***/"\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?");
 		std::regex entryApostrophesPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?");
 		std::regex entryPlainPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*(.*)");
@@ -55,9 +55,17 @@
 			lineNumber++;
 
 			if (std::regex_match(line, match, whitespacePattrern)) {
-				// TODO: support also whitespace
+				INIContentHandler::WhitespaceEvent event;
+				event.lineNumber = lineNumber;
+				event.eventNumber = ++eventNumber;
+				event.whitespace = match[0];
+				for (INIContentHandler* handler : handlers) handler->whitespace(event);
 			} else if (std::regex_match(line, match, commentPattrern)) {
-				// TODO: support also comments + emit also the comment style (;/#)
+				INIContentHandler::CommentEvent event;
+				event.lineNumber = lineNumber;
+				event.eventNumber = ++eventNumber;
+				event.comment = match[2];
+				for (INIContentHandler* handler : handlers) handler->comment(event);
 			} else if (std::regex_match(line, match, sectionPattrern)) {
 				if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
 				inSection = true;
@@ -65,7 +73,7 @@
 				event.lineNumber = lineNumber;
 				event.eventNumber = ++eventNumber;
 				event.name = match[1];
-				// TODO: support also comments + emit also the comment style (;/#)
+				event.comment = match[4];
 				for (INIContentHandler* handler : handlers) handler->startSection(event);
 			} else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern) || std::regex_match(line, match, entryPlainPattrern)) {
 				INIContentHandler::EntryEvent event;
@@ -76,13 +84,13 @@
 				event.fullKey = match[1];
 				event.value = match[5];
 				if (match.size() == 9) event.comment = match[8];
-				// TODO: emit also the quote style ('/"/) and surrounding whitespace
 				for (INIContentHandler* handler : handlers) handler->entry(event);
 			} else {
 				// TODO: warning, error, or support unknown content
 			}
 
-			// TODO: probably switch to state-machine approach instead of regular expressions
+			// General feautres:
+			// TODO: probably switch to state-machine approach instead of regular expressions or use an existing library
 			// TODO: warning/error handler
 			// TODO: support also multiline content (\ + \n)
 			// TODO: support also quoted or multiline keys?
@@ -92,6 +100,12 @@
 			// TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections
 			// TODO: support also option for alternative key-value separator (: instead of =)
 			// TODO: support also other encodings (currently only UTF-8 is supported)
+			
+			// Lossless conversions:
+			// TODO: emit also the quote style ('/"/)
+			// TODO: emit also the comment style (;/#) ?
+			// TODO: emit also the whitespace before key name, around =, after "values"/'values', around [sections] ?
+			// TODO: emit also the line-end type (LF/CRLF) ?
 		}
 
 		if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();