simple INI parser based on regular expressions
patterns taken from alt2xml:
- https://alt2xml.globalcode.info/
- https://hg.frantovo.cz/alt2xml/file/94081a55bf41/java/alt2xml-in-ini/src/cz/frantovo/alt2xml/in/ini/Reader.java#l151
--- a/src/INICommand.cpp Sat Nov 21 18:26:39 2020 +0100
+++ b/src/INICommand.cpp Sat Nov 21 20:09:18 2020 +0100
@@ -68,6 +68,7 @@
vector<AttributeMetadata> metadata;
metadata.push_back({L"section", TypeId::STRING});
metadata.push_back({L"key", TypeId::STRING});
+ metadata.push_back({L"subkey", TypeId::STRING});
metadata.push_back({L"value", TypeId::STRING});
writer->startRelation(configuration.relation, metadata, true);
};
@@ -84,14 +85,15 @@
currentSection.pop_back();
};
- void entry(const std::string& key, const std::string& value) override {
+ void entry(const std::string& key, const std::string& subkey, const std::string& value) override {
writer->writeAttribute(convertor.from_bytes(getCurrentSectionFullName()));
writer->writeAttribute(convertor.from_bytes(key));
+ writer->writeAttribute(convertor.from_bytes(subkey));
writer->writeAttribute(convertor.from_bytes(value));
};
// TODO: handle also comments and whitespace (to allow lossless transformation from INI and back to INI)
- // TODO: handle also subkeys (in [] brackets in the key)
+ // TODO: make subkeys (in [] brackets in the key) optional/configurable
};
--- a/src/lib/INIContentHandler.h Sat Nov 21 18:26:39 2020 +0100
+++ b/src/lib/INIContentHandler.h Sat Nov 21 20:09:18 2020 +0100
@@ -25,5 +25,5 @@
virtual void endDocument() = 0;
virtual void startSection(const std::string& name) = 0;
virtual void endSection() = 0;
- virtual void entry(const std::string& key, const std::string& value) = 0;
+ virtual void entry(const std::string& key, const std::string& subkey, const std::string& value) = 0;
};
\ No newline at end of file
--- a/src/lib/INIReader.cpp Sat Nov 21 18:26:39 2020 +0100
+++ b/src/lib/INIReader.cpp Sat Nov 21 20:09:18 2020 +0100
@@ -16,6 +16,7 @@
*/
#include <vector>
+#include <regex>
#include "INIReader.h"
@@ -33,34 +34,55 @@
}
void process() override {
-
- // TODO: real parser instead of demo data
- for (INIContentHandler* handler : handlers) {
- handler->startDocument();
-
- handler->entry("key-0", "outside sections");
-
- handler->startSection("section-1");
- handler->entry("key-1", "in section 1");
- handler->entry("key-2", "in section 1");
- handler->entry("key-3", "in section 1");
-
- handler->startSection("nested-section-1-1");
- handler->entry("key-1", "in nested section 1-1");
- handler->entry("key-2", "in nested section 1-1");
- handler->endSection();
-
- handler->endSection();
-
- handler->startSection("section-2");
- handler->entry("key-1", "in section 2");
- handler->endSection();
-
- handler->entry("key-666", "outside sections again; this normally would not happen, but should be supported");
-
- handler->endDocument();
+
+ for (INIContentHandler* handler : handlers) handler->startDocument();
+
+ std::regex whitespacePattrern("\\s*");
+ std::regex commentPattrern("\\s*(;|#)\\s*(.*)");
+ std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*");
+ std::regex entryQuotesPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?");
+ std::regex entryApostrophesPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?");
+ std::regex entryPlainPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*(.*)");
+
+ std::smatch match;
+ std::string section;
+ std::string line;
+
+ while (std::getline(input, line)) {
+
+ if (std::regex_match(line, match, whitespacePattrern)) {
+ // TODO: support also whitespace
+ } else if (std::regex_match(line, match, commentPattrern)) {
+ // TODO: support also comments + emit also the comment style (;/#)
+ } else if (std::regex_match(line, match, sectionPattrern)) {
+ if (section.size()) for (INIContentHandler* handler : handlers) handler->endSection();
+ section = match[1];
+ for (INIContentHandler* handler : handlers) handler->startSection(section);
+ } else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern)) {
+ // TODO: support also comments + emit also the comment style (;/#)
+ // TODO: emit also the quote style ('/"/) and surrounding whitespace
+ for (INIContentHandler* handler : handlers) handler->entry(match[1], match[3], match[4]);
+ } else if (std::regex_match(line, match, entryPlainPattrern)) {
+ for (INIContentHandler* handler : handlers) handler->entry(match[1], match[3], match[4]);
+ } else {
+ // TODO: warning, error, or support unknown content
+ }
+
+ // TODO: probably switch to state-machine approach instead of regular expressions
+ // TODO: warning/error handler
+ // TODO: support also multiline content (\ + \n)
+ // TODO: support also quoted or multiline keys?
+ // TODO: support also escaped characters
+ // TODO: support also Java .properties and manifest.mf formats?
+ // TODO: support also nested sections – hierarchy
+ // TODO: support also option for alternative key-value separator (: instead of =)
+ // TODO: support also other encodings (currently only UTF-8 is supported)
+ // TODO: emit line numbers and/or event order?
}
-
+
+ if (section.size()) for (INIContentHandler* handler : handlers) handler->endSection();
+
+ for (INIContentHandler* handler : handlers) handler->endDocument();
}
};
--- a/src/lib/INIReader.h Sat Nov 21 18:26:39 2020 +0100
+++ b/src/lib/INIReader.h Sat Nov 21 20:09:18 2020 +0100
@@ -17,6 +17,7 @@
#pragma once
#include <string>
+#include <istream>
#include "INIContentHandler.h"