src/lib/INIReader.cpp
author František Kučera <franta-hg@frantovo.cz>
Fri, 26 Aug 2022 22:41:55 +0200
branchv_0
changeset 37 d89f621951ae
parent 33 c9a158da6c32
permissions -rw-r--r--
fix typo: enbaled → enabled (thanks Jiří Wolker for reporting)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
#include <vector>
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <regex>
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    20
#include <sstream>
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    21
#include <stdexcept>
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include "INIReader.h"
29
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    24
#include "uri.h"
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    26
namespace relpipe {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    27
namespace in {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    28
namespace ini {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    29
namespace lib {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    30
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
class INIReaderImpl : public INIReader {
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
private:
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
	std::istream& input;
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
	std::vector<INIContentHandler*> handlers;
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    35
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    36
	class ConfiguredUnescapingProcessor {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    37
	public:
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    38
		std::shared_ptr<UnescapingProcessor> processor;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    39
		const std::string uri;
37
d89f621951ae fix typo: enbaled → enabled
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    40
		bool enabled;
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    41
37
d89f621951ae fix typo: enbaled → enabled
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
    42
		ConfiguredUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabled) : processor(processor), uri(uri), enabled(enabled) {
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    43
		}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    44
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    45
	};
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    46
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    47
	std::vector<ConfiguredUnescapingProcessor> unescapingProcessors;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    48
29
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    49
	class ConfiguredDialect {
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    50
	public:
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    51
		std::shared_ptr<Dialect> dialect;
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    52
		const std::string uri;
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    53
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    54
		ConfiguredDialect(std::shared_ptr<Dialect> dialect, const std::string uri) : dialect(dialect), uri(uri) {
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    55
		}
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    56
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    57
	};
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    58
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    59
	std::vector<ConfiguredDialect> dialects;
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
    60
33
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    61
	/**
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    62
	 * If there is a „\“ backspace at the end of a physical line, the logical line continues on the next physical line.
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    63
	 *
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    64
	 * Disabling this option makes sense only if we also disable the unescaping processors (unescape-basic, unescape-backspace).
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    65
	 * Otherwise they will complain about „Missing escape sequence“ because they got „\“ at the end of the value.
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    66
	 */
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    67
	bool allowLineContinuationsWithEscaping = true;
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    68
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    69
	/**
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    70
	 * If a line starts with a space, it is continuation of the previous line.
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    71
	 * This rule conflicts with default ignorance of such insignificant whitespace and is quite specific to the Java MANIFEST.MF dialect.
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    72
	 */
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    73
	bool allowLineContinuationsWithSpace = false;
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
    74
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    75
	/** 
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    76
	 * By default, we ignore all leading whitespace on continuing lines.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    77
	 * If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    78
	 * If a line break is desired, it should be written as \n (escaped) or the value should be quoted in " or '.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    79
	 * 
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    80
	 * TODO: several options:
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    81
	 *  - enabled, disabled
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    82
	 *  - if disabled, then: keep backslash, trim backslash, escape backslash
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    83
	 *    (keep requires support in some further unescaping phase, or it will cause an error)
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    84
	 *  - keep or trim the line end
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    85
	 *  - keep or trim the leading spaces
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    86
	 *  - allow comments interleaved with continuing lines (the freaky systemd syntax)
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    87
	 * 
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    88
	 * Related specifications:
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    89
	 *  - https://docs.oracle.com/javase/8/docs/api/index.html?java/util/Properties.html
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    90
	 *  - https://www.freedesktop.org/software/systemd/man/systemd.syntax.html
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    91
	 */
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    92
	bool trimLeadingSpacesOnContinuingLines = true;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    93
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    94
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    95
	/**
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    96
	 * Some dialects or configuration files in general does not support sections.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    97
	 * Then a line, that looks like an INI section, should be interpreted as a key
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    98
	 * (or error, if does not have a proper key-value separator).
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
    99
	 */
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   100
	bool allowSections = true;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   101
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   102
	/**
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   103
	 * KDE uses some weird INI dialect that allows [section][x] syntax where „x“ is kind of „tag“ that signalizes some properties of given section.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   104
	 * Line „[section_1][$i]“ means that the „section_1“ is „locked“.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   105
	 * We may emit this information somehow later, but for now, it is just ignored.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   106
	 * 
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   107
	 * TODO: Is „section tag“ right name?
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   108
	 * 
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   109
	 * Related specifications:
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   110
	 *  - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Lock_Down
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   111
	 */
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   112
	bool allowSectionTags = true;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   113
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   114
	/**
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   115
	 * If whole key is „aaa[bbb]“ then „aaa“ is considered to be the key and „bbb“ the sub-key.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   116
	 * No \[ escaping is currently supported, so the key might not contain the bracket character.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   117
	 * 
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   118
	 * Related specifications:
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   119
	 *  - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Shell_Expansion
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   120
	 *  - https://specifications.freedesktop.org/desktop-entry-spec/latest/ar01s05.html
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   121
	 */
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   122
	bool allowSubKeys = true;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   123
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   124
	/**
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   125
	 * Classic INI uses „key=value“ syntax.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   126
	 * But some other formats/dialects might use key:value.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   127
	 * 
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   128
	 * Only single character separators are supported.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   129
	 * If multiple separators should be recognized (e.g. both „=“ and „:“), this string will contain all of them,
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   130
	 * i.e. „:=“ does not mean that the „key:=value“ syntax, but „key=value“ or „key:value“.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   131
	 */
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   132
	std::string keyValueSeparators = "=";
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   133
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   134
	/**
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   135
	 * Classic INI uses „; comment“ syntax.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   136
	 * But many existing files contain „# comment“ lines.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   137
	 * 
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   138
	 * Only single character separators are supported (works same as keyValueSeparators).
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   139
	 */
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   140
	std::string commentSeparators = ";#";
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   141
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   142
	/**
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   143
	 * INI often support both "quotes" and 'apostrophes' styles.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   144
	 * But some dialects may support only one of them or not support quoting at all.
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   145
	 * 
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   146
	 * In such case e.g. „key="some value"“ would mean that the value is „"value"“ (including the quotes).
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   147
	 * Thus it is important to allow disabling quote recognizing (which is done by setting this parameter to empty string).
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   148
	 * 
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   149
	 * Only single character quotes are supported (works same as keyValueSeparators).
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   150
	 */
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   151
	std::string quotes = "\"'";
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   152
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   153
	int lineNumber = 1;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   154
	int eventNumber = 0;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   155
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   156
	/**
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   157
	 * Should be always used instead of input.peek().
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   158
	 * Skips \r.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   159
	 */
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   160
	char peek() {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   161
		// In 2020 there is no need to manually return the carriage. However some legacy systems still do it.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   162
		char ch = input.peek();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   163
		if (ch == '\r') {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   164
			input.get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   165
			ch = input.peek();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   166
		}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   167
		return ch;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   168
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   169
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   170
	/**
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   171
	 * Should be always used instead of input.get().
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   172
	 * Counts the lines and skips \r.
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   173
	 */
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   174
	char get() {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   175
		char ch = input.get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   176
		if (ch == '\n') lineNumber++;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   177
		else if (ch == '\r') ch = get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   178
		return ch;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   179
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   180
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   181
	std::string readSpacesAndTabs() {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   182
		std::stringstream result;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   183
		for (char ch = peek(); input.good() && (ch == ' ' || ch == '\t'); ch = peek()) result.put(get());
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   184
		return result.str();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   185
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   186
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   187
	std::string readAllWhitespace() {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   188
		std::stringstream result;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   189
		for (char ch = peek(); input.good() && (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); ch = peek()) result.put(get());
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   190
		return result.str();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   191
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   192
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   193
	void processContinuingLine(std::stringstream& result) {
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   194
		if (trimLeadingSpacesOnContinuingLines) readSpacesAndTabs();
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   195
		else result.put('\n');
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   196
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   197
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   198
	std::string readUntil(const char until, bool* found = nullptr) {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   199
		return readUntil(std::string(1, until), found);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   200
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   201
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   202
	std::string readUntil(const std::string& until, bool* found = nullptr) {
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   203
		std::stringstream result;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   204
33
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   205
		for (char ch = peek(); input.good(); ch = peek()) {
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   206
			if (allowLineContinuationsWithSpace && ch == '\n') {
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   207
				get();
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   208
				ch = peek();
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   209
				if (ch == ' ') get();
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   210
				else if (ch == std::istream::traits_type::eof()) break;
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   211
				else {
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   212
					if (found) *found = true;
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   213
					return result.str();
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   214
				}
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   215
			} else if (oneOf(ch, until)) {
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   216
				break;
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   217
			} else if (allowLineContinuationsWithEscaping && ch == '\\') {
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   218
				get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   219
				ch = get();
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   220
				if (oneOf(ch, until) && ch == '\n') processContinuingLine(result);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   221
				else if (oneOf(ch, until)) result.put(ch);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   222
				else if (ch == std::istream::traits_type::eof()) break;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   223
				else result.put('\\').put(ch);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   224
				// unescaping is done in two phases:
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   225
				// here we unescape just the \n (LF)
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   226
				// other escape sequences are leaved untouched and will be processed in later phases, see see UnescapingINIHandler
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   227
			} else {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   228
				ch = get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   229
				result.put(ch);
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   230
			}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   231
		}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   232
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   233
		if (oneOf(peek(), until)) {
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   234
			get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   235
			if (found) *found = true;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   236
		} else {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   237
			if (found) *found = false;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   238
		}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   239
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   240
		return result.str();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   241
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   242
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   243
	std::string readToken(const char until, char* quote = nullptr, bool* found = nullptr) {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   244
		return readToken(std::string(1, until), quote, found);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   245
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   246
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   247
	std::string readToken(const std::string& until, char* quote = nullptr, bool* found = nullptr) {
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   248
		std::string result;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   249
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   250
		char ch = peek();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   251
		if (isQuote(ch)) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   252
			if (quote) *quote = ch;
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   253
			result = readUntil(std::string(1, get()), found);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   254
		} else {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   255
			if (quote) *quote = 0;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   256
			result = readUntil(until, found);
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   257
		}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   258
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   259
		return result;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   260
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   261
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   262
	std::string readTokenAndEatTerminator(char until, char* quote = nullptr, bool* found = nullptr) {
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   263
		return readTokenAndEatTerminator(std::string(1, until), quote, found);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   264
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   265
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   266
	std::string readTokenAndEatTerminator(const std::string& until, char* quote = nullptr, bool* found = nullptr) {
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   267
		std::string result = readToken(until, quote, found);
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   268
		if (*quote) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   269
			readAllWhitespace();
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   270
			if (!oneOf(get(), until)) throw std::logic_error(std::string("missing „") + until + "“ after quoted section name");
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   271
		}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   272
		return result;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   273
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   274
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   275
	std::string unescape(const std::string& value, UnescapingProcessor::TextType type) {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   276
		std::string result = value;
37
d89f621951ae fix typo: enbaled → enabled
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
   277
		for (ConfiguredUnescapingProcessor p : unescapingProcessors) if (p.enabled) result = p.processor->unescape(result, type);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   278
		return result;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   279
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   280
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   281
	bool isComment(char ch) {
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   282
		return oneOf(ch, commentSeparators);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   283
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   284
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   285
	bool isQuote(char ch) {
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   286
		return oneOf(ch, quotes);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   287
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   288
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   289
	/**
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   290
	 * @param ch character to be evaluated
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   291
	 * @param options list of options (characters)
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   292
	 * @return whether ch is one of options
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   293
	 */
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   294
	bool oneOf(char ch, const std::string& options) {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   295
		return options.find(ch) != std::string::npos;
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   296
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   297
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   298
	std::string trim(std::string s) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   299
		return std::regex_replace(s, std::regex("^\\s+|\\s+$"), "");
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   300
	}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   301
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   302
	/**
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   303
	 * TODO: use a common method
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   304
	 */
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   305
	bool parseBoolean(const std::string& value) {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   306
		if (value == "true") return true;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   307
		else if (value == "false") return false;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   308
		else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)");
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   309
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   310
29
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   311
	void setDialect(const std::string& uri) {
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   312
		for (ConfiguredDialect& d : dialects) {
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   313
			if (d.uri == uri) {
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   314
				d.dialect->apply(*this);
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   315
				return;
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   316
			}
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   317
		}
29
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   318
		throw std::invalid_argument(std::string("Unsupported INI dialect: ") + uri);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   319
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   320
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   321
	bool setUnescaping(const std::string& uri, const std::string& value) {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   322
		for (ConfiguredUnescapingProcessor& p : unescapingProcessors) {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   323
			if (p.uri == uri) {
37
d89f621951ae fix typo: enbaled → enabled
František Kučera <franta-hg@frantovo.cz>
parents: 33
diff changeset
   324
				p.enabled = parseBoolean(value);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   325
				return true;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   326
			}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   327
		}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   328
		return false;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   329
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   330
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   331
public:
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   332
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   333
	INIReaderImpl(std::istream& input) : input(input) {
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   334
	}
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   335
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   336
	void setOption(const std::string& uri, const std::string& value) override {
33
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   337
		if (uri == option::AllowLineContinuationWithEscaping) allowLineContinuationsWithEscaping = parseBoolean(value);
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   338
		else if (uri == option::AllowLineContinuationWithSpace) allowLineContinuationsWithSpace = parseBoolean(value);
c9a158da6c32 java-manifest-mf dialect for parsing MANIFEST.MF files (import shared code from relpipe-in-ini)
František Kučera <franta-hg@frantovo.cz>
parents: 29
diff changeset
   339
		else if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value);
29
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   340
		else if (uri == option::AllowSections) allowSections = parseBoolean(value);
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   341
		else if (uri == option::AllowSectionTags) allowSectionTags = parseBoolean(value);
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   342
		else if (uri == option::AllowSubKeys) allowSubKeys = parseBoolean(value);
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   343
		else if (uri == option::CommentSeparators) commentSeparators = value;
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   344
		else if (uri == option::KeyValueSeparators) keyValueSeparators = value;
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   345
		else if (uri == option::Quotes) quotes = value;
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   346
		else if (uri == option::Dialect) setDialect(value);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   347
		else if (setUnescaping(uri, value));
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   348
		else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   349
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   350
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   351
	void addHandler(INIContentHandler* handler) override {
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   352
		handlers.push_back(handler);
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   353
	}
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   354
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   355
	void addUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabledByDefault) override {
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   356
		unescapingProcessors.push_back({processor, uri, enabledByDefault});
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   357
	}
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   358
29
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   359
	void addDialect(std::shared_ptr<Dialect> dialect, const std::string uri, bool enabledByDefault) override {
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   360
		dialects.push_back({dialect, uri});
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   361
		if (enabledByDefault) dialect->apply(*this);
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   362
	}
06aaad12c207 configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   363
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   364
	void process() override {
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   365
		for (INIContentHandler* handler : handlers) handler->startDocument();
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   366
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   367
		bool inSection = false;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   368
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   369
		while (input.good()) { // TODO: condition
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   370
			{
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   371
				INIContentHandler::WhitespaceEvent event;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   372
				event.lineNumber = lineNumber;
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   373
				std::string whitespace = readAllWhitespace();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   374
				if (whitespace.size()) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   375
					event.eventNumber = ++eventNumber;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   376
					event.whitespace = whitespace;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   377
					for (INIContentHandler* handler : handlers) handler->whitespace(event);
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   378
				}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   379
			}
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   380
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   381
			bool found;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   382
			char quote;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   383
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   384
			char ch = peek();
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   385
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   386
			if (ch == std::istream::traits_type::eof()) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   387
				break;
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   388
			} else if (ch == '[' && allowSections) {
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   389
				if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   390
				inSection = true;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   391
				INIContentHandler::SectionStartEvent event;
19
90f2b8ca32bf improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   392
				event.lineNumber = lineNumber;
90f2b8ca32bf improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   393
				event.eventNumber = ++eventNumber;
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   394
				get();
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   395
				readAllWhitespace();
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   396
				event.name = readTokenAndEatTerminator(']', &quote, &found);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   397
				if (!quote) event.name = trim(event.name);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   398
				event.name = unescape(event.name, UnescapingProcessor::TextType::SectionName);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   399
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   400
				readSpacesAndTabs();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   401
				if (allowSectionTags && peek() == '[') {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   402
					get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   403
					event.tag = readTokenAndEatTerminator(']', &quote, &found);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   404
					event.tag = unescape(event.tag, UnescapingProcessor::TextType::SectionTag);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   405
				}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   406
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   407
				readSpacesAndTabs();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   408
				ch = peek();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   409
				if (isComment(ch)) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   410
					get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   411
					readSpacesAndTabs();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   412
					event.comment = readUntil('\n', &found);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   413
					event.comment = unescape(event.comment, UnescapingProcessor::TextType::SectionComment);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   414
				} else if (ch == '\n') {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   415
					get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   416
				} else {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   417
					throw std::logic_error(std::string("unexpected content after the section: '") + event.name + "'");
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   418
				}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   419
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   420
				for (INIContentHandler* handler : handlers) handler->startSection(event);
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   421
			} else if (isComment(ch)) {
19
90f2b8ca32bf improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   422
				INIContentHandler::CommentEvent event;
90f2b8ca32bf improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   423
				event.lineNumber = lineNumber;
90f2b8ca32bf improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   424
				event.eventNumber = ++eventNumber;
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   425
				get();
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   426
				readSpacesAndTabs();
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   427
				event.comment = readUntil('\n', &found);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   428
				event.comment = unescape(event.comment, UnescapingProcessor::TextType::Comment);
19
90f2b8ca32bf improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents: 16
diff changeset
   429
				for (INIContentHandler* handler : handlers) handler->comment(event);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   430
			} else {
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   431
				INIContentHandler::EntryEvent event;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   432
				event.lineNumber = lineNumber;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   433
				event.eventNumber = ++eventNumber;
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   434
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   435
				std::string fullKey = readToken(keyValueSeparators, &quote, &found);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   436
				if (!found) throw std::logic_error(std::string("missing = after key: '") + fullKey + "'");
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   437
				if (!quote) fullKey = trim(fullKey);
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   438
				readSpacesAndTabs();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   439
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   440
				if (quote) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   441
					ch = get();
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   442
					if (oneOf(ch, keyValueSeparators)) readSpacesAndTabs();
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   443
					else throw std::logic_error(std::string("missing = after quoted key: '") + fullKey + "'");
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   444
				}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   445
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   446
				std::string value = readToken('\n', &quote, &found);
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   447
				if (!quote) value = trim(value);
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   448
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   449
				event.key = fullKey;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   450
				event.fullKey = fullKey;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   451
				event.value = value;
25
ee70b17950bd multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   452
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   453
				if (allowSubKeys) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   454
					std::smatch match;
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   455
					if (std::regex_match(fullKey, match, std::regex("([^\\[]+)\\[([^\\[]+)\\]"))) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   456
						event.key = match[1];
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   457
						event.subKey = match[2];
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   458
						event.fullKey = fullKey;
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   459
						event.subKey = unescape(event.subKey, UnescapingProcessor::TextType::EntryKey);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   460
					}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   461
				}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   462
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   463
				event.key = unescape(event.key, UnescapingProcessor::TextType::EntryKey);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   464
				event.fullKey = unescape(event.fullKey, UnescapingProcessor::TextType::EntryKey);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   465
				event.value = unescape(event.value, UnescapingProcessor::TextType::EntryValue);
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   466
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   467
				if (quote) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   468
					readSpacesAndTabs();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   469
					ch = peek();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   470
					if (isComment(ch)) {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   471
						get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   472
						readSpacesAndTabs();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   473
						event.comment = readUntil('\n', &found);
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   474
						event.comment = unescape(event.comment, UnescapingProcessor::TextType::EntryComment);
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   475
					} else if (ch == '\n') {
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   476
						get();
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   477
					} else {
28
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   478
						// TODO: optional support for multiple tokens in a single entry?
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   479
						// modes: array, concatenate
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   480
						// some-array-1 = "item 1" "item 2" 'item 3' item 4
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   481
						// some-array-2 = "item 1" "item 2" 'item 3' item_4 item_5
0e7c57d48d1e configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   482
						// some-bash-style-string-value = "this "will' be' concatenated → this will be concatenated
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   483
						throw std::logic_error(std::string("unexpected content after the quoted value: key='") + fullKey + "' value='" + event.value + "'");
25
ee70b17950bd multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   484
					}
ee70b17950bd multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   485
				}
ee70b17950bd multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   486
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   487
				for (INIContentHandler* handler : handlers) handler->entry(event);
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   488
			}
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   489
		}
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   490
		// TODO: error at the end, catch premature/unexpected EOF
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   491
		// TODO: unescape + trim values + ignore \r
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   492
		// TODO: count lines
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   493
		if (inSection) for (INIContentHandler* handler : handlers) handler->endSection();
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   494
		for (INIContentHandler* handler : handlers) handler->endDocument();
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   495
	}
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   496
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   497
	// General feautres:
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   498
	// TODO: warning/error handler
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   499
	// TODO: support also escaped characters
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   500
	// TODO: support also Java .properties and manifest.mf formats?
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   501
	// TODO: support also nested sections – hierarchy
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   502
	// TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   503
	// TODO: support also option for alternative key-value separator (: instead of =)
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   504
	// TODO: support also other encodings (currently only UTF-8 is supported)
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   505
	// TODO: better exceptions
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   506
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   507
	// Lossless conversions:
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   508
	// TODO: emit also the quote style ('/"/)
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   509
	// TODO: emit also the comment style (;/#) ?
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   510
	// TODO: emit also the whitespace before key name, around =, after "values"/'values', around [sections] ?
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   511
	// TODO: emit also the line-end type (LF/CRLF) ?
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   512
16
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   513
};
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   514
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   515
INIReader* INIReader::create(std::istream& input) {
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   516
	return new INIReaderImpl(input);
b9a3c806468a temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   517
}
26
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   518
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   519
}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   520
}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   521
}
80e129ec3408 new INI parser
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   522
}