author | František Kučera <franta-hg@frantovo.cz> |
Sat, 21 Nov 2020 20:09:18 +0100 | |
branch | v_0 |
changeset 1 | 3876a9c56a66 |
parent 0 | 16c7fa9b7c49 |
child 2 | f031a4dc7c52 |
permissions | -rw-r--r-- |
0
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
1 |
/** |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
2 |
* Relational pipes |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
3 |
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
4 |
* |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
5 |
* This program is free software: you can redistribute it and/or modify |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
6 |
* it under the terms of the GNU General Public License as published by |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
7 |
* the Free Software Foundation, version 3 of the License. |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
8 |
* |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
9 |
* This program is distributed in the hope that it will be useful, |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
12 |
* GNU General Public License for more details. |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
13 |
* |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
14 |
* You should have received a copy of the GNU General Public License |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
16 |
*/ |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
17 |
|
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
18 |
#include <vector> |
1
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
19 |
#include <regex> |
0
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
20 |
|
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
21 |
#include "INIReader.h" |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
22 |
|
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
23 |
class INIReaderImpl : public INIReader { |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
24 |
private: |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
25 |
std::istream& input; |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
26 |
std::vector<INIContentHandler*> handlers; |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
27 |
public: |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
28 |
|
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
29 |
INIReaderImpl(std::istream& input) : input(input) { |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
30 |
} |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
31 |
|
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
32 |
void addHandler(INIContentHandler* handler) override { |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
33 |
handlers.push_back(handler); |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
34 |
} |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
35 |
|
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
36 |
void process() override { |
1
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
37 |
|
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
38 |
for (INIContentHandler* handler : handlers) handler->startDocument(); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
39 |
|
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
40 |
std::regex whitespacePattrern("\\s*"); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
41 |
std::regex commentPattrern("\\s*(;|#)\\s*(.*)"); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
42 |
std::regex sectionPattrern("\\s*\\[\\s*([^\\]]+)\\s*\\]\\s*"); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
43 |
std::regex entryQuotesPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*\"([^']+)\"\\s*((;|#)\\s*(.*))?"); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
44 |
std::regex entryApostrophesPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*'([^']+)'\\s*((;|#)\\s*(.*))?"); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
45 |
std::regex entryPlainPattrern("\\s*([^=\\]]+?[^=\\s\\]]*)(\\[([^\\]]+)\\])?\\s*=\\s*(.*)"); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
46 |
|
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
47 |
std::smatch match; |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
48 |
std::string section; |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
49 |
std::string line; |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
50 |
|
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
51 |
while (std::getline(input, line)) { |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
52 |
|
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
53 |
if (std::regex_match(line, match, whitespacePattrern)) { |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
54 |
// TODO: support also whitespace |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
55 |
} else if (std::regex_match(line, match, commentPattrern)) { |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
56 |
// TODO: support also comments + emit also the comment style (;/#) |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
57 |
} else if (std::regex_match(line, match, sectionPattrern)) { |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
58 |
if (section.size()) for (INIContentHandler* handler : handlers) handler->endSection(); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
59 |
section = match[1]; |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
60 |
for (INIContentHandler* handler : handlers) handler->startSection(section); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
61 |
} else if (std::regex_match(line, match, entryQuotesPattrern) || std::regex_match(line, match, entryApostrophesPattrern)) { |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
62 |
// TODO: support also comments + emit also the comment style (;/#) |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
63 |
// TODO: emit also the quote style ('/"/) and surrounding whitespace |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
64 |
for (INIContentHandler* handler : handlers) handler->entry(match[1], match[3], match[4]); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
65 |
} else if (std::regex_match(line, match, entryPlainPattrern)) { |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
66 |
for (INIContentHandler* handler : handlers) handler->entry(match[1], match[3], match[4]); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
67 |
} else { |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
68 |
// TODO: warning, error, or support unknown content |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
69 |
} |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
70 |
|
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
71 |
// TODO: probably switch to state-machine approach instead of regular expressions |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
72 |
// TODO: warning/error handler |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
73 |
// TODO: support also multiline content (\ + \n) |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
74 |
// TODO: support also quoted or multiline keys? |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
75 |
// TODO: support also escaped characters |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
76 |
// TODO: support also Java .properties and manifest.mf formats? |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
77 |
// TODO: support also nested sections – hierarchy |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
78 |
// TODO: support also option for alternative key-value separator (: instead of =) |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
79 |
// TODO: support also other encodings (currently only UTF-8 is supported) |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
80 |
// TODO: emit line numbers and/or event order? |
0
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
81 |
} |
1
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
82 |
|
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
83 |
if (section.size()) for (INIContentHandler* handler : handlers) handler->endSection(); |
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
84 |
|
3876a9c56a66
simple INI parser based on regular expressions
František Kučera <franta-hg@frantovo.cz>
parents:
0
diff
changeset
|
85 |
for (INIContentHandler* handler : handlers) handler->endDocument(); |
0
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
86 |
} |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
87 |
}; |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
88 |
|
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
89 |
INIReader* INIReader::create(std::istream& input) { |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
90 |
return new INIReaderImpl(input); |
16c7fa9b7c49
project and parser skeleton + output demo data
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
91 |
} |