src/lib/INIReader.cpp
author František Kučera <franta-hg@frantovo.cz>
Sun, 22 Nov 2020 01:22:45 +0100
branchv_0
changeset 3 4313e91da50b
parent 2 f031a4dc7c52
child 4 d9b047731e18
permissions -rw-r--r--
add missing endSection() call
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
#include <vector>
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    19
#include <regex>
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include "INIReader.h"
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
class INIReaderImpl : public INIReader {
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
private:
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
	std::istream& input;
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
	std::vector<INIContentHandler*> handlers;
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
public:
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
	INIReaderImpl(std::istream& input) : input(input) {
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
	}
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
	void addHandler(INIContentHandler* handler) override {
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
		handlers.push_back(handler);
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
	}
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
	void process() override {
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    37
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    38
		for (INIContentHandler* handler : handlers) handler->startDocument();
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    39
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    40
		std::regex whitespacePattrern("\\s*");
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    41
		std::regex commentPattrern("\\s*(;|#)\\s*(.*)");
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    42
		std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*");
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    43
		std::regex entryQuotesPattrern(/***/"\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?");
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    44
		std::regex entryApostrophesPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?");
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    45
		std::regex entryPlainPattrern("\\s*(([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?)\\s*=\\s*(.*)");
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    46
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    47
		std::smatch match;
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    48
		bool inSection = false;
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    49
		std::string line;
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    50
		int lineNumber = 0;
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    51
		int eventNumber = 0;
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    52
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    53
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    54
		while (std::getline(input, line)) {
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    55
			lineNumber++;
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    56
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    57
			if (std::regex_match(line, match, whitespacePattrern)) {
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    58
				// TODO: support also whitespace
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    59
			} else if (std::regex_match(line, match, commentPattrern)) {
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    60
				// TODO: support also comments + emit also the comment style (;/#)
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    61
			} else if (std::regex_match(line, match, sectionPattrern)) {
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    62
				if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
3
4313e91da50b add missing endSection() call
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    63
				inSection = true;
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    64
				INIContentHandler::SectionStartEvent event;
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    65
				event.lineNumber = lineNumber;
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    66
				event.eventNumber = ++eventNumber;
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    67
				event.name = match[1];
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    68
				// TODO: support also comments + emit also the comment style (;/#)
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    69
				for (INIContentHandler* handler : handlers) handler->startSection(event);
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    70
			} else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern) || std::regex_match(line, match, entryPlainPattrern)) {
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    71
				INIContentHandler::EntryEvent event;
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    72
				event.lineNumber = lineNumber;
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    73
				event.eventNumber = ++eventNumber;
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    74
				event.key = match[2];
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    75
				event.subKey = match[4];
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    76
				event.fullKey = match[1];
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    77
				event.value = match[5];
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    78
				if (match.size() == 9) event.comment = match[8];
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    79
				// TODO: emit also the quote style ('/"/) and surrounding whitespace
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    80
				for (INIContentHandler* handler : handlers) handler->entry(event);
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    81
			} else {
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    82
				// TODO: warning, error, or support unknown content
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    83
			}
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    84
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    85
			// TODO: probably switch to state-machine approach instead of regular expressions
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    86
			// TODO: warning/error handler
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    87
			// TODO: support also multiline content (\ + \n)
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    88
			// TODO: support also quoted or multiline keys?
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    89
			// TODO: support also escaped characters
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    90
			// TODO: support also Java .properties and manifest.mf formats?
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    91
			// TODO: support also nested sections – hierarchy
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    92
			// TODO: support also option for alternative key-value separator (: instead of =)
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    93
			// TODO: support also other encodings (currently only UTF-8 is supported)
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    94
		}
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    95
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    96
		if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    97
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    98
		for (INIContentHandler* handler : handlers) handler->endDocument();
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    99
	}
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   100
};
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   101
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   102
INIReader* INIReader::create(std::istream& input) {
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   103
	return new INIReaderImpl(input);
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   104
}