# HG changeset patch # User František Kučera # Date 1605985758 -3600 # Node ID 3876a9c56a6601bf8a1a8521d882b0c92a86d627 # Parent 16c7fa9b7c499782264a1100fe6ab3565dac1117 simple INI parser based on regular expressions patterns taken from alt2xml: - https://alt2xml.globalcode.info/ - https://hg.frantovo.cz/alt2xml/file/94081a55bf41/java/alt2xml-in-ini/src/cz/frantovo/alt2xml/in/ini/Reader.java#l151 diff -r 16c7fa9b7c49 -r 3876a9c56a66 src/INICommand.cpp --- a/src/INICommand.cpp Sat Nov 21 18:26:39 2020 +0100 +++ b/src/INICommand.cpp Sat Nov 21 20:09:18 2020 +0100 @@ -68,6 +68,7 @@ vector metadata; metadata.push_back({L"section", TypeId::STRING}); metadata.push_back({L"key", TypeId::STRING}); + metadata.push_back({L"subkey", TypeId::STRING}); metadata.push_back({L"value", TypeId::STRING}); writer->startRelation(configuration.relation, metadata, true); }; @@ -84,14 +85,15 @@ currentSection.pop_back(); }; - void entry(const std::string& key, const std::string& value) override { + void entry(const std::string& key, const std::string& subkey, const std::string& value) override { writer->writeAttribute(convertor.from_bytes(getCurrentSectionFullName())); writer->writeAttribute(convertor.from_bytes(key)); + writer->writeAttribute(convertor.from_bytes(subkey)); writer->writeAttribute(convertor.from_bytes(value)); }; // TODO: handle also comments and whitespace (to allow lossless transformation from INI and back to INI) - // TODO: handle also subkeys (in [] brackets in the key) + // TODO: make subkeys (in [] brackets in the key) optional/configurable }; diff -r 16c7fa9b7c49 -r 3876a9c56a66 src/lib/INIContentHandler.h --- a/src/lib/INIContentHandler.h Sat Nov 21 18:26:39 2020 +0100 +++ b/src/lib/INIContentHandler.h Sat Nov 21 20:09:18 2020 +0100 @@ -25,5 +25,5 @@ virtual void endDocument() = 0; virtual void startSection(const std::string& name) = 0; virtual void endSection() = 0; - virtual void entry(const std::string& key, const std::string& value) = 0; + virtual void entry(const std::string& key, const std::string& subkey, const std::string& value) = 0; }; \ No newline at end of file diff -r 16c7fa9b7c49 -r 3876a9c56a66 src/lib/INIReader.cpp --- a/src/lib/INIReader.cpp Sat Nov 21 18:26:39 2020 +0100 +++ b/src/lib/INIReader.cpp Sat Nov 21 20:09:18 2020 +0100 @@ -16,6 +16,7 @@ */ #include +#include #include "INIReader.h" @@ -33,34 +34,55 @@ } void process() override { - - // TODO: real parser instead of demo data - for (INIContentHandler* handler : handlers) { - handler->startDocument(); - - handler->entry("key-0", "outside sections"); - - handler->startSection("section-1"); - handler->entry("key-1", "in section 1"); - handler->entry("key-2", "in section 1"); - handler->entry("key-3", "in section 1"); - - handler->startSection("nested-section-1-1"); - handler->entry("key-1", "in nested section 1-1"); - handler->entry("key-2", "in nested section 1-1"); - handler->endSection(); - - handler->endSection(); - - handler->startSection("section-2"); - handler->entry("key-1", "in section 2"); - handler->endSection(); - - handler->entry("key-666", "outside sections again; this normally would not happen, but should be supported"); - - handler->endDocument(); + + for (INIContentHandler* handler : handlers) handler->startDocument(); + + std::regex whitespacePattrern("\\s*"); + std::regex commentPattrern("\\s*(;|#)\\s*(.*)"); + std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*"); + std::regex entryQuotesPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?"); + std::regex entryApostrophesPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?"); + std::regex entryPlainPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*(.*)"); + + std::smatch match; + std::string section; + std::string line; + + while (std::getline(input, line)) { + + if (std::regex_match(line, match, whitespacePattrern)) { + // TODO: support also whitespace + } else if (std::regex_match(line, match, commentPattrern)) { + // TODO: support also comments + emit also the comment style (;/#) + } else if (std::regex_match(line, match, sectionPattrern)) { + if (section.size()) for (INIContentHandler* handler : handlers) handler->endSection(); + section = match[1]; + for (INIContentHandler* handler : handlers) handler->startSection(section); + } else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern)) { + // TODO: support also comments + emit also the comment style (;/#) + // TODO: emit also the quote style ('/"/) and surrounding whitespace + for (INIContentHandler* handler : handlers) handler->entry(match[1], match[3], match[4]); + } else if (std::regex_match(line, match, entryPlainPattrern)) { + for (INIContentHandler* handler : handlers) handler->entry(match[1], match[3], match[4]); + } else { + // TODO: warning, error, or support unknown content + } + + // TODO: probably switch to state-machine approach instead of regular expressions + // TODO: warning/error handler + // TODO: support also multiline content (\ + \n) + // TODO: support also quoted or multiline keys? + // TODO: support also escaped characters + // TODO: support also Java .properties and manifest.mf formats? + // TODO: support also nested sections – hierarchy + // TODO: support also option for alternative key-value separator (: instead of =) + // TODO: support also other encodings (currently only UTF-8 is supported) + // TODO: emit line numbers and/or event order? } - + + if (section.size()) for (INIContentHandler* handler : handlers) handler->endSection(); + + for (INIContentHandler* handler : handlers) handler->endDocument(); } }; diff -r 16c7fa9b7c49 -r 3876a9c56a66 src/lib/INIReader.h --- a/src/lib/INIReader.h Sat Nov 21 18:26:39 2020 +0100 +++ b/src/lib/INIReader.h Sat Nov 21 20:09:18 2020 +0100 @@ -17,6 +17,7 @@ #pragma once #include +#include #include "INIContentHandler.h"