src/lib/INIReader.cpp
author František Kučera <franta-hg@frantovo.cz>
Thu, 26 Nov 2020 18:52:49 +0100
branchv_0
changeset 22 29d673a54ecf
parent 21 b35baebf5005
child 23 b497140b0b63
permissions -rw-r--r--
prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
#include <vector>
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    19
#include <regex>
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    20
#include <sstream>
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    21
#include <stdexcept>
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include "INIReader.h"
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    25
namespace relpipe {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    26
namespace in {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    27
namespace ini {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    28
namespace lib {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    29
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
class INIReaderImpl : public INIReader {
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
private:
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
	std::istream& input;
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
	std::vector<INIContentHandler*> handlers;
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    34
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    35
	/** 
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    36
	 * This might be configurable.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    37
	 * 
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    38
	 * By default, we ignore all leading whitespace on continuing lines.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    39
	 * If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    40
	 * If a line break is desired, it should be written as \n (escaped) or the value should be quoted in " or '.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    41
	 * 
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    42
	 * Related specifications:
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    43
	 *  - https://docs.oracle.com/javase/8/docs/api/index.html?java/util/Properties.html
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    44
	 */
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    45
	bool consumeLeadingSpacesOnContinuingLines = true;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    46
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    47
	/**
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    48
	 * This might be configurable.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    49
	 * 
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    50
	 * KDE uses some weird INI dialect that allows [section][x] syntax where „x“ is kind of „tag“ that signalizes some properties of given section.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    51
	 * Line „[section_1][$i]“ means that the „section_1“ is „locked“.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    52
	 * We may emit this information somehow later, but for now, it is just ignored.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    53
	 * 
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    54
	 * TODO: Is „section tag“ right name?
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    55
	 * 
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    56
	 * Related specifications:
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    57
	 *  - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Lock_Down
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    58
	 */
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    59
	bool allowSectionTags = true;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    60
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    61
	/**
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    62
	 * This might be configurable.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    63
	 * 
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    64
	 * If whole key is „aaa[bbb]“ then „aaa“ is considered to be the key and „bbb“ the sub-key.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    65
	 * No \[ escaping is currently supported, so the key might not contain the bracket character.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    66
	 * 
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    67
	 * Related specifications:
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    68
	 *  - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Shell_Expansion
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    69
	 *  - https://specifications.freedesktop.org/desktop-entry-spec/latest/ar01s05.html
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    70
	 */
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    71
	bool allowSubKeys = true;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
    72
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    73
	/**
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    74
	 * This might be configurable.
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    75
	 * 
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    76
	 * Classic INI uses „key=value“ syntax.
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    77
	 * But some other formats/dialects might use key:value.
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    78
	 * 
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    79
	 * Only single character separators are supported.
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    80
	 * If multiple separators should be recognized (e.g. both „=“ and „:“), this string will contain all of them,
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    81
	 * i.e. „:=“ does not mean that the „key:=value“ syntax, but „key=value“ or „key:value“.
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    82
	 */
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    83
	std::string keyValueSeparators = "=";
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    84
21
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    85
	/**
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    86
	 * This might be configurable.
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    87
	 * 
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    88
	 * Classic INI uses „; comment“ syntax.
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    89
	 * But many existing files contain „# comment“ lines.
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    90
	 * 
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    91
	 * Only single character separators are supported (works same as keyValueSeparators).
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    92
	 */
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    93
	std::string commentSeparators = ";#";
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
    94
22
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
    95
	/**
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
    96
	 * This might be configurable.
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
    97
	 * 
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
    98
	 * INI often support both "quotes" and 'apostrophes' styles.
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
    99
	 * But some dialects may support only one of them or not support quoting at all.
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   100
	 * 
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   101
	 * In such case e.g. „key="some value"“ would mean that the value is „"value"“ (including the quotes).
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   102
	 * Thus it is important to allow disabling quote recognizing (which is done by setting this parameter to empty string).
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   103
	 * 
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   104
	 * Only single character quotes are supported (works same as keyValueSeparators).
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   105
	 */
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   106
	std::string quotes = "\"'";
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   107
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   108
	int lineNumber = 1;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   109
	int eventNumber = 0;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   110
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   111
	/**
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   112
	 * Should be always used instead of input.peek().
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   113
	 * Skips \r.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   114
	 */
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   115
	char peek() {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   116
		// In 2020 there is no need to manually return the carriage. However some legacy systems still do it.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   117
		char ch = input.peek();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   118
		if (ch == '\r') {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   119
			input.get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   120
			ch = input.peek();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   121
		}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   122
		return ch;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   123
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   124
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   125
	/**
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   126
	 * Should be always used instead of input.get().
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   127
	 * Counts the lines and skips \r.
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   128
	 */
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   129
	char get() {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   130
		char ch = input.get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   131
		if (ch == '\n') lineNumber++;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   132
		else if (ch == '\r') ch = get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   133
		return ch;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   134
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   135
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   136
	std::string readSpacesAndTabs() {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   137
		std::stringstream result;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   138
		for (char ch = peek(); input.good() && (ch == ' ' || ch == '\t'); ch = peek()) result.put(get());
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   139
		return result.str();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   140
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   141
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   142
	std::string readAllWhitespace() {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   143
		std::stringstream result;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   144
		for (char ch = peek(); input.good() && (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); ch = peek()) result.put(get());
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   145
		return result.str();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   146
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   147
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   148
	void processContinuingLine(std::stringstream& result) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   149
		if (consumeLeadingSpacesOnContinuingLines) readSpacesAndTabs();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   150
		else result.put('\n');
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   151
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   152
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   153
	std::string readUntil(const char until, bool* found = nullptr) {
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   154
		return readUntil(std::string(1, until), found);
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   155
	}
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   156
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   157
	std::string readUntil(const std::string& until, bool* found = nullptr) {
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   158
		std::stringstream result;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   159
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   160
		for (char ch = peek(); input.good() && !oneOf(ch, until); ch = peek()) {
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   161
			if (ch == '\\') {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   162
				get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   163
				ch = get();
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   164
				if (oneOf(ch, until) && ch == '\n') processContinuingLine(result);
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   165
				else if (oneOf(ch, until)) result.put(ch);
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   166
				else if (ch == std::istream::traits_type::eof()) break;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   167
				else result.put('\\').put(ch);
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   168
				// unescaping is done in two phases:
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   169
				// here we unescape just the \n (LF)
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   170
				// other escape sequences are leaved untouched and will be processed in later phases, see see UnescapingINIHandler
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   171
			} else {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   172
				ch = get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   173
				result.put(ch);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   174
			}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   175
		}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   176
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   177
		if (oneOf(peek(), until)) {
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   178
			get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   179
			if (found) *found = true;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   180
		} else {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   181
			if (found) *found = false;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   182
		}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   183
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   184
		return result.str();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   185
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   186
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   187
	std::string readToken(const char until, char* quote = nullptr, bool* found = nullptr) {
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   188
		return readToken(std::string(1, until), quote, found);
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   189
	}
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   190
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   191
	std::string readToken(const std::string& until, char* quote = nullptr, bool* found = nullptr) {
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   192
		std::string result;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   193
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   194
		char ch = peek();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   195
		if (isQuote(ch)) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   196
			if (quote) *quote = ch;
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   197
			result = readUntil(std::string(1, get()), found);
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   198
		} else {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   199
			if (quote) *quote = 0;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   200
			result = readUntil(until, found);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   201
		}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   202
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   203
		return result;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   204
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   205
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   206
	std::string readTokenAndEatTerminator(char until, char* quote = nullptr, bool* found = nullptr) {
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   207
		return readTokenAndEatTerminator(std::string(1, until), quote, found);
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   208
	}
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   209
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   210
	std::string readTokenAndEatTerminator(const std::string& until, char* quote = nullptr, bool* found = nullptr) {
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   211
		std::string result = readToken(until, quote, found);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   212
		if (*quote) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   213
			readAllWhitespace();
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   214
			if (!oneOf(get(), until)) throw std::logic_error(std::string("missing „") + until + "“ after quoted section name");
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   215
		}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   216
		return result;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   217
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   218
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   219
	bool isComment(char ch) {
21
b35baebf5005 prepare for multiple and configurable comment separators (e.g. „; comment“ and „# comment“)
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   220
		return oneOf(ch, commentSeparators);
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   221
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   222
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   223
	bool isQuote(char ch) {
22
29d673a54ecf prepare for multiple and configurable quotes (e.g. „key="value"“ and „key='value'“ or disabling quote support at all)
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   224
		return oneOf(ch, quotes);
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   225
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   226
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   227
	/**
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   228
	 * @param ch character to be evaluated
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   229
	 * @param options list of options (characters)
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   230
	 * @return whether ch is one of options
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   231
	 */
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   232
	bool oneOf(char ch, const std::string& options) {
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   233
		return options.find(ch) != std::string::npos;
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   234
	}
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   235
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   236
	std::string trim(std::string s) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   237
		return std::regex_replace(s, std::regex("^\\s+|\\s+$"), "");
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   238
	}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   239
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   240
public:
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   241
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   242
	INIReaderImpl(std::istream& input) : input(input) {
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   243
	}
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   244
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   245
	void addHandler(INIContentHandler* handler) override {
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   246
		handlers.push_back(handler);
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   247
	}
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   248
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   249
	void process() override {
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   250
		for (INIContentHandler* handler : handlers) handler->startDocument();
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   251
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   252
		bool inSection = false;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   253
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   254
		while (input.good()) { // TODO: condition
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   255
			{
19
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   256
				INIContentHandler::WhitespaceEvent event;
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   257
				event.lineNumber = lineNumber;
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   258
				std::string whitespace = readAllWhitespace();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   259
				if (whitespace.size()) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   260
					event.eventNumber = ++eventNumber;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   261
					event.whitespace = whitespace;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   262
					for (INIContentHandler* handler : handlers) handler->whitespace(event);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   263
				}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   264
			}
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   265
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   266
			bool found;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   267
			char quote;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   268
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   269
			char ch = peek();
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   270
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   271
			if (ch == std::istream::traits_type::eof()) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   272
				break;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   273
			} else if (ch == '[') {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   274
				if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   275
				inSection = true;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   276
				INIContentHandler::SectionStartEvent event;
6
fb717cfbfea1 improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   277
				event.lineNumber = lineNumber;
fb717cfbfea1 improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   278
				event.eventNumber = ++eventNumber;
19
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   279
				get();
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   280
				readAllWhitespace();
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   281
				event.name = readTokenAndEatTerminator(']', &quote, &found);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   282
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   283
				readSpacesAndTabs();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   284
				if (allowSectionTags && peek() == '[') {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   285
					get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   286
					event.tag = readTokenAndEatTerminator(']', &quote, &found);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   287
				}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   288
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   289
				readSpacesAndTabs();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   290
				ch = peek();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   291
				if (isComment(ch)) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   292
					get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   293
					readSpacesAndTabs();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   294
					event.comment = readUntil('\n', &found);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   295
				} else if (ch == '\n') {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   296
					get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   297
				} else {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   298
					throw std::logic_error(std::string("unexpected content after the section: '") + event.name + "'");
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   299
				}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   300
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   301
				for (INIContentHandler* handler : handlers) handler->startSection(event);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   302
			} else if (isComment(ch)) {
6
fb717cfbfea1 improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   303
				INIContentHandler::CommentEvent event;
fb717cfbfea1 improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   304
				event.lineNumber = lineNumber;
fb717cfbfea1 improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   305
				event.eventNumber = ++eventNumber;
19
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   306
				get();
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   307
				readSpacesAndTabs();
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   308
				event.comment = readUntil('\n', &found);
6
fb717cfbfea1 improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
   309
				for (INIContentHandler* handler : handlers) handler->comment(event);
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   310
			} else {
19
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   311
				INIContentHandler::EntryEvent event;
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   312
				event.lineNumber = lineNumber;
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   313
				event.eventNumber = ++eventNumber;
967f73af64a4 start event numbers with 1
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   314
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   315
				std::string fullKey = readToken(keyValueSeparators, &quote, &found);
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   316
				if (!found) throw std::logic_error(std::string("missing = after key: '") + fullKey + "'");
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   317
				if (!quote) fullKey = trim(fullKey);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   318
				readSpacesAndTabs();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   319
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   320
				if (quote) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   321
					ch = get();
20
9187f0439ca9 prepare for multiple and configurable key-value separators (e.g. key=value and key:value)
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   322
					if (oneOf(ch, keyValueSeparators)) readSpacesAndTabs();
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   323
					else throw std::logic_error(std::string("missing = after quoted key: '") + fullKey + "'");
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   324
				}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   325
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   326
				std::string value = readToken('\n', &quote, &found);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   327
				if (!quote) value = trim(value);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   328
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   329
				event.key = fullKey;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   330
				event.fullKey = fullKey;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   331
				event.value = value;
14
ea431c469403 multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   332
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   333
				if (allowSubKeys) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   334
					std::smatch match;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   335
					if (std::regex_match(fullKey, match, std::regex("([^\\[]+)\\[([^\\[]+)\\]"))) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   336
						event.key = match[1];
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   337
						event.subKey = match[2];
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   338
						event.fullKey = fullKey;
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   339
					}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   340
				}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   341
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   342
				if (quote) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   343
					readSpacesAndTabs();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   344
					ch = peek();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   345
					if (isComment(ch)) {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   346
						get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   347
						readSpacesAndTabs();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   348
						event.comment = readUntil('\n', &found);
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   349
					} else if (ch == '\n') {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   350
						get();
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   351
					} else {
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   352
						throw std::logic_error(std::string("unexpected content after the quoted value: key='") + fullKey + "' value='" + event.value + "'");
14
ea431c469403 multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   353
					}
ea431c469403 multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   354
				}
ea431c469403 multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   355
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   356
				for (INIContentHandler* handler : handlers) handler->entry(event);
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   357
			}
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   358
		}
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   359
		// TODO: error at the end, catch premature/unexpected EOF
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   360
		// TODO: unescape + trim values + ignore \r
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   361
		// TODO: count lines
2
f031a4dc7c52 add options: --enable-sections --enable-subkeys --enable-comments --enable-line-numbers --enable-event-numbers
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   362
		if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
1
3876a9c56a66 simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   363
		for (INIContentHandler* handler : handlers) handler->endDocument();
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   364
	}
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   365
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   366
	// General feautres:
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   367
	// TODO: warning/error handler
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   368
	// TODO: support also escaped characters
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   369
	// TODO: support also Java .properties and manifest.mf formats?
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   370
	// TODO: support also nested sections – hierarchy
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   371
	// TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   372
	// TODO: support also option for alternative key-value separator (: instead of =)
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   373
	// TODO: support also other encodings (currently only UTF-8 is supported)
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   374
	// TODO: better exceptions
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   375
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   376
	// Lossless conversions:
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   377
	// TODO: emit also the quote style ('/"/)
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   378
	// TODO: emit also the comment style (;/#) ?
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   379
	// TODO: emit also the whitespace before key name, around =, after "values"/'values', around [sections] ?
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   380
	// TODO: emit also the line-end type (LF/CRLF) ?
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   381
0
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   382
};
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   383
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   384
INIReader* INIReader::create(std::istream& input) {
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   385
	return new INIReaderImpl(input);
16c7fa9b7c49 project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   386
}
16
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   387
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   388
}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   389
}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   390
}
db994a2ddffa new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 14
diff changeset
   391
}