author | František Kučera <franta-hg@frantovo.cz> |
Sun, 29 Nov 2020 10:49:33 +0100 | |
branch | v_0 |
changeset 31 | c6527b45fbc2 |
parent 29 | 06aaad12c207 |
child 33 | c9a158da6c32 |
permissions | -rw-r--r-- |
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
1 |
/** |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
2 |
* Relational pipes |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
3 |
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
4 |
* |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
5 |
* This program is free software: you can redistribute it and/or modify |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
6 |
* it under the terms of the GNU General Public License as published by |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
7 |
* the Free Software Foundation, version 3 of the License. |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
8 |
* |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
9 |
* This program is distributed in the hope that it will be useful, |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
12 |
* GNU General Public License for more details. |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
13 |
* |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
14 |
* You should have received a copy of the GNU General Public License |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
16 |
*/ |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
17 |
|
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
18 |
#include <vector> |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
19 |
#include <regex> |
26 | 20 |
#include <sstream> |
21 |
#include <stdexcept> |
|
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
22 |
|
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
23 |
#include "INIReader.h" |
29
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
24 |
#include "uri.h" |
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
25 |
|
26 | 26 |
namespace relpipe { |
27 |
namespace in { |
|
28 |
namespace ini { |
|
29 |
namespace lib { |
|
30 |
||
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
31 |
class INIReaderImpl : public INIReader { |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
32 |
private: |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
33 |
std::istream& input; |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
34 |
std::vector<INIContentHandler*> handlers; |
26 | 35 |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
36 |
class ConfiguredUnescapingProcessor { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
37 |
public: |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
38 |
std::shared_ptr<UnescapingProcessor> processor; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
39 |
const std::string uri; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
40 |
bool enbaled; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
41 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
42 |
ConfiguredUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enbaled) : processor(processor), uri(uri), enbaled(enbaled) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
43 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
44 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
45 |
}; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
46 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
47 |
std::vector<ConfiguredUnescapingProcessor> unescapingProcessors; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
48 |
|
29
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
49 |
class ConfiguredDialect { |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
50 |
public: |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
51 |
std::shared_ptr<Dialect> dialect; |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
52 |
const std::string uri; |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
53 |
|
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
54 |
ConfiguredDialect(std::shared_ptr<Dialect> dialect, const std::string uri) : dialect(dialect), uri(uri) { |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
55 |
} |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
56 |
|
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
57 |
}; |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
58 |
|
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
59 |
std::vector<ConfiguredDialect> dialects; |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
60 |
|
26 | 61 |
/** |
62 |
* By default, we ignore all leading whitespace on continuing lines. |
|
63 |
* If there should be some spaces or tabs, they should be placed on the previous line before the „\“. |
|
64 |
* If a line break is desired, it should be written as \n (escaped) or the value should be quoted in " or '. |
|
65 |
* |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
66 |
* TODO: several options: |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
67 |
* - enabled, disabled |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
68 |
* - if disabled, then: keep backslash, trim backslash, escape backslash |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
69 |
* (keep requires support in some further unescaping phase, or it will cause an error) |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
70 |
* - keep or trim the line end |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
71 |
* - keep or trim the leading spaces |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
72 |
* - allow comments interleaved with continuing lines (the freaky systemd syntax) |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
73 |
* |
26 | 74 |
* Related specifications: |
75 |
* - https://docs.oracle.com/javase/8/docs/api/index.html?java/util/Properties.html |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
76 |
* - https://www.freedesktop.org/software/systemd/man/systemd.syntax.html |
26 | 77 |
*/ |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
78 |
bool trimLeadingSpacesOnContinuingLines = true; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
79 |
|
26 | 80 |
|
81 |
/** |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
82 |
* Some dialects or configuration files in general does not support sections. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
83 |
* Then a line, that looks like an INI section, should be interpreted as a key |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
84 |
* (or error, if does not have a proper key-value separator). |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
85 |
*/ |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
86 |
bool allowSections = true; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
87 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
88 |
/** |
26 | 89 |
* KDE uses some weird INI dialect that allows [section][x] syntax where „x“ is kind of „tag“ that signalizes some properties of given section. |
90 |
* Line „[section_1][$i]“ means that the „section_1“ is „locked“. |
|
91 |
* We may emit this information somehow later, but for now, it is just ignored. |
|
92 |
* |
|
93 |
* TODO: Is „section tag“ right name? |
|
94 |
* |
|
95 |
* Related specifications: |
|
96 |
* - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Lock_Down |
|
97 |
*/ |
|
98 |
bool allowSectionTags = true; |
|
99 |
||
100 |
/** |
|
101 |
* If whole key is „aaa[bbb]“ then „aaa“ is considered to be the key and „bbb“ the sub-key. |
|
102 |
* No \[ escaping is currently supported, so the key might not contain the bracket character. |
|
103 |
* |
|
104 |
* Related specifications: |
|
105 |
* - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Shell_Expansion |
|
106 |
* - https://specifications.freedesktop.org/desktop-entry-spec/latest/ar01s05.html |
|
107 |
*/ |
|
108 |
bool allowSubKeys = true; |
|
109 |
||
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
110 |
/** |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
111 |
* Classic INI uses „key=value“ syntax. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
112 |
* But some other formats/dialects might use key:value. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
113 |
* |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
114 |
* Only single character separators are supported. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
115 |
* If multiple separators should be recognized (e.g. both „=“ and „:“), this string will contain all of them, |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
116 |
* i.e. „:=“ does not mean that the „key:=value“ syntax, but „key=value“ or „key:value“. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
117 |
*/ |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
118 |
std::string keyValueSeparators = "="; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
119 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
120 |
/** |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
121 |
* Classic INI uses „; comment“ syntax. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
122 |
* But many existing files contain „# comment“ lines. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
123 |
* |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
124 |
* Only single character separators are supported (works same as keyValueSeparators). |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
125 |
*/ |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
126 |
std::string commentSeparators = ";#"; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
127 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
128 |
/** |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
129 |
* INI often support both "quotes" and 'apostrophes' styles. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
130 |
* But some dialects may support only one of them or not support quoting at all. |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
131 |
* |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
132 |
* In such case e.g. „key="some value"“ would mean that the value is „"value"“ (including the quotes). |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
133 |
* Thus it is important to allow disabling quote recognizing (which is done by setting this parameter to empty string). |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
134 |
* |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
135 |
* Only single character quotes are supported (works same as keyValueSeparators). |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
136 |
*/ |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
137 |
std::string quotes = "\"'"; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
138 |
|
26 | 139 |
int lineNumber = 1; |
140 |
int eventNumber = 0; |
|
141 |
||
142 |
/** |
|
143 |
* Should be always used instead of input.peek(). |
|
144 |
* Skips \r. |
|
145 |
*/ |
|
146 |
char peek() { |
|
147 |
// In 2020 there is no need to manually return the carriage. However some legacy systems still do it. |
|
148 |
char ch = input.peek(); |
|
149 |
if (ch == '\r') { |
|
150 |
input.get(); |
|
151 |
ch = input.peek(); |
|
152 |
} |
|
153 |
return ch; |
|
154 |
} |
|
155 |
||
156 |
/** |
|
157 |
* Should be always used instead of input.get(). |
|
158 |
* Counts the lines and skips \r. |
|
159 |
*/ |
|
160 |
char get() { |
|
161 |
char ch = input.get(); |
|
162 |
if (ch == '\n') lineNumber++; |
|
163 |
else if (ch == '\r') ch = get(); |
|
164 |
return ch; |
|
165 |
} |
|
166 |
||
167 |
std::string readSpacesAndTabs() { |
|
168 |
std::stringstream result; |
|
169 |
for (char ch = peek(); input.good() && (ch == ' ' || ch == '\t'); ch = peek()) result.put(get()); |
|
170 |
return result.str(); |
|
171 |
} |
|
172 |
||
173 |
std::string readAllWhitespace() { |
|
174 |
std::stringstream result; |
|
175 |
for (char ch = peek(); input.good() && (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); ch = peek()) result.put(get()); |
|
176 |
return result.str(); |
|
177 |
} |
|
178 |
||
179 |
void processContinuingLine(std::stringstream& result) { |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
180 |
if (trimLeadingSpacesOnContinuingLines) readSpacesAndTabs(); |
26 | 181 |
else result.put('\n'); |
182 |
} |
|
183 |
||
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
184 |
std::string readUntil(const char until, bool* found = nullptr) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
185 |
return readUntil(std::string(1, until), found); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
186 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
187 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
188 |
std::string readUntil(const std::string& until, bool* found = nullptr) { |
26 | 189 |
std::stringstream result; |
190 |
||
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
191 |
for (char ch = peek(); input.good() && !oneOf(ch, until); ch = peek()) { |
26 | 192 |
if (ch == '\\') { |
193 |
get(); |
|
194 |
ch = get(); |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
195 |
if (oneOf(ch, until) && ch == '\n') processContinuingLine(result); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
196 |
else if (oneOf(ch, until)) result.put(ch); |
26 | 197 |
else if (ch == std::istream::traits_type::eof()) break; |
198 |
else result.put('\\').put(ch); |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
199 |
// unescaping is done in two phases: |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
200 |
// here we unescape just the \n (LF) |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
201 |
// other escape sequences are leaved untouched and will be processed in later phases, see see UnescapingINIHandler |
26 | 202 |
} else { |
203 |
ch = get(); |
|
204 |
result.put(ch); |
|
205 |
} |
|
206 |
} |
|
207 |
||
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
208 |
if (oneOf(peek(), until)) { |
26 | 209 |
get(); |
210 |
if (found) *found = true; |
|
211 |
} else { |
|
212 |
if (found) *found = false; |
|
213 |
} |
|
214 |
||
215 |
return result.str(); |
|
216 |
} |
|
217 |
||
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
218 |
std::string readToken(const char until, char* quote = nullptr, bool* found = nullptr) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
219 |
return readToken(std::string(1, until), quote, found); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
220 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
221 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
222 |
std::string readToken(const std::string& until, char* quote = nullptr, bool* found = nullptr) { |
26 | 223 |
std::string result; |
224 |
||
225 |
char ch = peek(); |
|
226 |
if (isQuote(ch)) { |
|
227 |
if (quote) *quote = ch; |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
228 |
result = readUntil(std::string(1, get()), found); |
26 | 229 |
} else { |
230 |
if (quote) *quote = 0; |
|
231 |
result = readUntil(until, found); |
|
232 |
} |
|
233 |
||
234 |
return result; |
|
235 |
} |
|
236 |
||
237 |
std::string readTokenAndEatTerminator(char until, char* quote = nullptr, bool* found = nullptr) { |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
238 |
return readTokenAndEatTerminator(std::string(1, until), quote, found); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
239 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
240 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
241 |
std::string readTokenAndEatTerminator(const std::string& until, char* quote = nullptr, bool* found = nullptr) { |
26 | 242 |
std::string result = readToken(until, quote, found); |
243 |
if (*quote) { |
|
244 |
readAllWhitespace(); |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
245 |
if (!oneOf(get(), until)) throw std::logic_error(std::string("missing „") + until + "“ after quoted section name"); |
26 | 246 |
} |
247 |
return result; |
|
248 |
} |
|
249 |
||
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
250 |
std::string unescape(const std::string& value, UnescapingProcessor::TextType type) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
251 |
std::string result = value; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
252 |
for (ConfiguredUnescapingProcessor p : unescapingProcessors) if (p.enbaled) result = p.processor->unescape(result, type); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
253 |
return result; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
254 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
255 |
|
26 | 256 |
bool isComment(char ch) { |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
257 |
return oneOf(ch, commentSeparators); |
26 | 258 |
} |
259 |
||
260 |
bool isQuote(char ch) { |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
261 |
return oneOf(ch, quotes); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
262 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
263 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
264 |
/** |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
265 |
* @param ch character to be evaluated |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
266 |
* @param options list of options (characters) |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
267 |
* @return whether ch is one of options |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
268 |
*/ |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
269 |
bool oneOf(char ch, const std::string& options) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
270 |
return options.find(ch) != std::string::npos; |
26 | 271 |
} |
272 |
||
273 |
std::string trim(std::string s) { |
|
274 |
return std::regex_replace(s, std::regex("^\\s+|\\s+$"), ""); |
|
275 |
} |
|
276 |
||
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
277 |
/** |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
278 |
* TODO: use a common method |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
279 |
*/ |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
280 |
bool parseBoolean(const std::string& value) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
281 |
if (value == "true") return true; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
282 |
else if (value == "false") return false; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
283 |
else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)"); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
284 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
285 |
|
29
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
286 |
void setDialect(const std::string& uri) { |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
287 |
for (ConfiguredDialect& d : dialects) { |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
288 |
if (d.uri == uri) { |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
289 |
d.dialect->apply(*this); |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
290 |
return; |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
291 |
} |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
292 |
} |
29
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
293 |
throw std::invalid_argument(std::string("Unsupported INI dialect: ") + uri); |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
294 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
295 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
296 |
bool setUnescaping(const std::string& uri, const std::string& value) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
297 |
for (ConfiguredUnescapingProcessor& p : unescapingProcessors) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
298 |
if (p.uri == uri) { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
299 |
p.enbaled = parseBoolean(value); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
300 |
return true; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
301 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
302 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
303 |
return false; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
304 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
305 |
|
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
306 |
public: |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
307 |
|
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
308 |
INIReaderImpl(std::istream& input) : input(input) { |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
309 |
} |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
310 |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
311 |
void setOption(const std::string& uri, const std::string& value) override { |
29
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
312 |
if (uri == option::TrimContinuingLines) trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
313 |
else if (uri == option::AllowSections) allowSections = parseBoolean(value); |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
314 |
else if (uri == option::AllowSectionTags) allowSectionTags = parseBoolean(value); |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
315 |
else if (uri == option::AllowSubKeys) allowSubKeys = parseBoolean(value); |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
316 |
else if (uri == option::CommentSeparators) commentSeparators = value; |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
317 |
else if (uri == option::KeyValueSeparators) keyValueSeparators = value; |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
318 |
else if (uri == option::Quotes) quotes = value; |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
319 |
else if (uri == option::Dialect) setDialect(value); |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
320 |
else if (setUnescaping(uri, value)); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
321 |
else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“"); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
322 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
323 |
|
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
324 |
void addHandler(INIContentHandler* handler) override { |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
325 |
handlers.push_back(handler); |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
326 |
} |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
327 |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
328 |
void addUnescapingProcessor(std::shared_ptr<UnescapingProcessor> processor, const std::string uri, bool enabledByDefault) override { |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
329 |
unescapingProcessors.push_back({processor, uri, enabledByDefault}); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
330 |
} |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
331 |
|
29
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
332 |
void addDialect(std::shared_ptr<Dialect> dialect, const std::string uri, bool enabledByDefault) override { |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
333 |
dialects.push_back({dialect, uri}); |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
334 |
if (enabledByDefault) dialect->apply(*this); |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
335 |
} |
06aaad12c207
configurable dialects: in separate classes
František Kučera <franta-hg@frantovo.cz>
parents:
28
diff
changeset
|
336 |
|
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
337 |
void process() override { |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
338 |
for (INIContentHandler* handler : handlers) handler->startDocument(); |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
339 |
|
26 | 340 |
bool inSection = false; |
341 |
||
342 |
while (input.good()) { // TODO: condition |
|
343 |
{ |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
344 |
INIContentHandler::WhitespaceEvent event; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
345 |
event.lineNumber = lineNumber; |
26 | 346 |
std::string whitespace = readAllWhitespace(); |
347 |
if (whitespace.size()) { |
|
348 |
event.eventNumber = ++eventNumber; |
|
349 |
event.whitespace = whitespace; |
|
350 |
for (INIContentHandler* handler : handlers) handler->whitespace(event); |
|
351 |
} |
|
352 |
} |
|
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
353 |
|
26 | 354 |
bool found; |
355 |
char quote; |
|
356 |
||
357 |
char ch = peek(); |
|
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
358 |
|
26 | 359 |
if (ch == std::istream::traits_type::eof()) { |
360 |
break; |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
361 |
} else if (ch == '[' && allowSections) { |
26 | 362 |
if (inSection) for (INIContentHandler* handler : handlers) handler->endSection(); |
363 |
inSection = true; |
|
364 |
INIContentHandler::SectionStartEvent event; |
|
19
90f2b8ca32bf
improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
365 |
event.lineNumber = lineNumber; |
90f2b8ca32bf
improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
366 |
event.eventNumber = ++eventNumber; |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
367 |
get(); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
368 |
readAllWhitespace(); |
26 | 369 |
event.name = readTokenAndEatTerminator(']', "e, &found); |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
370 |
if (!quote) event.name = trim(event.name); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
371 |
event.name = unescape(event.name, UnescapingProcessor::TextType::SectionName); |
26 | 372 |
|
373 |
readSpacesAndTabs(); |
|
374 |
if (allowSectionTags && peek() == '[') { |
|
375 |
get(); |
|
376 |
event.tag = readTokenAndEatTerminator(']', "e, &found); |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
377 |
event.tag = unescape(event.tag, UnescapingProcessor::TextType::SectionTag); |
26 | 378 |
} |
379 |
||
380 |
readSpacesAndTabs(); |
|
381 |
ch = peek(); |
|
382 |
if (isComment(ch)) { |
|
383 |
get(); |
|
384 |
readSpacesAndTabs(); |
|
385 |
event.comment = readUntil('\n', &found); |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
386 |
event.comment = unescape(event.comment, UnescapingProcessor::TextType::SectionComment); |
26 | 387 |
} else if (ch == '\n') { |
388 |
get(); |
|
389 |
} else { |
|
390 |
throw std::logic_error(std::string("unexpected content after the section: '") + event.name + "'"); |
|
391 |
} |
|
392 |
||
393 |
for (INIContentHandler* handler : handlers) handler->startSection(event); |
|
394 |
} else if (isComment(ch)) { |
|
19
90f2b8ca32bf
improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
395 |
INIContentHandler::CommentEvent event; |
90f2b8ca32bf
improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
396 |
event.lineNumber = lineNumber; |
90f2b8ca32bf
improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
397 |
event.eventNumber = ++eventNumber; |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
398 |
get(); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
399 |
readSpacesAndTabs(); |
26 | 400 |
event.comment = readUntil('\n', &found); |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
401 |
event.comment = unescape(event.comment, UnescapingProcessor::TextType::Comment); |
19
90f2b8ca32bf
improved support for comments and whitespace
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
402 |
for (INIContentHandler* handler : handlers) handler->comment(event); |
26 | 403 |
} else { |
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
404 |
INIContentHandler::EntryEvent event; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
405 |
event.lineNumber = lineNumber; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
406 |
event.eventNumber = ++eventNumber; |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
407 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
408 |
std::string fullKey = readToken(keyValueSeparators, "e, &found); |
26 | 409 |
if (!found) throw std::logic_error(std::string("missing = after key: '") + fullKey + "'"); |
410 |
if (!quote) fullKey = trim(fullKey); |
|
411 |
readSpacesAndTabs(); |
|
412 |
||
413 |
if (quote) { |
|
414 |
ch = get(); |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
415 |
if (oneOf(ch, keyValueSeparators)) readSpacesAndTabs(); |
26 | 416 |
else throw std::logic_error(std::string("missing = after quoted key: '") + fullKey + "'"); |
417 |
} |
|
418 |
||
419 |
std::string value = readToken('\n', "e, &found); |
|
420 |
if (!quote) value = trim(value); |
|
421 |
||
422 |
event.key = fullKey; |
|
423 |
event.fullKey = fullKey; |
|
424 |
event.value = value; |
|
25
ee70b17950bd
multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents:
24
diff
changeset
|
425 |
|
26 | 426 |
if (allowSubKeys) { |
427 |
std::smatch match; |
|
428 |
if (std::regex_match(fullKey, match, std::regex("([^\\[]+)\\[([^\\[]+)\\]"))) { |
|
429 |
event.key = match[1]; |
|
430 |
event.subKey = match[2]; |
|
431 |
event.fullKey = fullKey; |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
432 |
event.subKey = unescape(event.subKey, UnescapingProcessor::TextType::EntryKey); |
26 | 433 |
} |
434 |
} |
|
435 |
||
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
436 |
event.key = unescape(event.key, UnescapingProcessor::TextType::EntryKey); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
437 |
event.fullKey = unescape(event.fullKey, UnescapingProcessor::TextType::EntryKey); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
438 |
event.value = unescape(event.value, UnescapingProcessor::TextType::EntryValue); |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
439 |
|
26 | 440 |
if (quote) { |
441 |
readSpacesAndTabs(); |
|
442 |
ch = peek(); |
|
443 |
if (isComment(ch)) { |
|
444 |
get(); |
|
445 |
readSpacesAndTabs(); |
|
446 |
event.comment = readUntil('\n', &found); |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
447 |
event.comment = unescape(event.comment, UnescapingProcessor::TextType::EntryComment); |
26 | 448 |
} else if (ch == '\n') { |
449 |
get(); |
|
450 |
} else { |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
451 |
// TODO: optional support for multiple tokens in a single entry? |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
452 |
// modes: array, concatenate |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
453 |
// some-array-1 = "item 1" "item 2" 'item 3' item 4 |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
454 |
// some-array-2 = "item 1" "item 2" 'item 3' item_4 item_5 |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
26
diff
changeset
|
455 |
// some-bash-style-string-value = "this "will' be' concatenated → this will be concatenated |
26 | 456 |
throw std::logic_error(std::string("unexpected content after the quoted value: key='") + fullKey + "' value='" + event.value + "'"); |
25
ee70b17950bd
multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents:
24
diff
changeset
|
457 |
} |
ee70b17950bd
multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents:
24
diff
changeset
|
458 |
} |
ee70b17950bd
multi-line support: quoted and apostrophed
František Kučera <franta-hg@frantovo.cz>
parents:
24
diff
changeset
|
459 |
|
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
460 |
for (INIContentHandler* handler : handlers) handler->entry(event); |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
461 |
} |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
462 |
} |
26 | 463 |
// TODO: error at the end, catch premature/unexpected EOF |
464 |
// TODO: unescape + trim values + ignore \r |
|
465 |
// TODO: count lines |
|
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
466 |
if (inSection) for (INIContentHandler* handler : handlers) handler->endSection(); |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
467 |
for (INIContentHandler* handler : handlers) handler->endDocument(); |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
468 |
} |
26 | 469 |
|
470 |
// General feautres: |
|
471 |
// TODO: warning/error handler |
|
472 |
// TODO: support also escaped characters |
|
473 |
// TODO: support also Java .properties and manifest.mf formats? |
|
474 |
// TODO: support also nested sections – hierarchy |
|
475 |
// TODO: support also nested keys e.g. key.sub.subsub.subsubsub=value – translate them to nested sections |
|
476 |
// TODO: support also option for alternative key-value separator (: instead of =) |
|
477 |
// TODO: support also other encodings (currently only UTF-8 is supported) |
|
478 |
// TODO: better exceptions |
|
479 |
||
480 |
// Lossless conversions: |
|
481 |
// TODO: emit also the quote style ('/"/) |
|
482 |
// TODO: emit also the comment style (;/#) ? |
|
483 |
// TODO: emit also the whitespace before key name, around =, after "values"/'values', around [sections] ? |
|
484 |
// TODO: emit also the line-end type (LF/CRLF) ? |
|
485 |
||
16
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
486 |
}; |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
487 |
|
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
488 |
INIReader* INIReader::create(std::istream& input) { |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
489 |
return new INIReaderImpl(input); |
b9a3c806468a
temporary copy INIReader.h, INIReader.cpp, INIContentHandler.h from relpipe-in-ini + XMLNameCodec.h from relpipe-in-yamltable (will be moved to alt2xml and shared)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
490 |
} |
26 | 491 |
|
492 |
} |
|
493 |
} |
|
494 |
} |
|
495 |
} |