author | František Kučera <franta-hg@frantovo.cz> |
Sun, 31 Oct 2021 17:30:40 +0100 | |
branch | v_0 |
changeset 33 | c9a158da6c32 |
parent 28 | 0e7c57d48d1e |
permissions | -rw-r--r-- |
27
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
1 |
/** |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
2 |
* Relational pipes |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
3 |
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
4 |
* |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
5 |
* This program is free software: you can redistribute it and/or modify |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
6 |
* it under the terms of the GNU General Public License as published by |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
7 |
* the Free Software Foundation, version 3 of the License. |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
8 |
* |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
9 |
* This program is distributed in the hope that it will be useful, |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
12 |
* GNU General Public License for more details. |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
13 |
* |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
14 |
* You should have received a copy of the GNU General Public License |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
16 |
*/ |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
17 |
#pragma once |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
18 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
19 |
#include <sstream> |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
20 |
#include <codecvt> |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
21 |
#include <arpa/inet.h> |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
22 |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
23 |
#include "UnescapingProcessor.h" |
27
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
24 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
25 |
using namespace std; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
26 |
using namespace relpipe::writer; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
27 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
28 |
namespace relpipe { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
29 |
namespace in { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
30 |
namespace ini { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
31 |
namespace lib { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
32 |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
33 |
/** |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
34 |
* Should work according to <https://docs.oracle.com/javase/specs/jls/se15/html/jls-3.html#jls-3.3> 3.3. Unicode Escapes |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
35 |
*/ |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
36 |
class JavaPropertiesUnescapingProcessor : public UnescapingProcessor { |
27
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
37 |
private: |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
38 |
wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8 |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
39 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
40 |
bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
41 |
if (hexLength != binLength * 2) return false; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
42 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
43 |
for (size_t i = 0; i < binLength; i++) { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
44 |
uint8_t value = 0; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
45 |
char a = hexadecimal[i * 2]; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
46 |
char b = hexadecimal[i * 2 + 1]; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
47 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
48 |
if (a >= '0' && a <= '9') value += (a - '0')*16; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
49 |
else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
50 |
else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
51 |
else return false; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
52 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
53 |
if (b >= '0' && b <= '9') value += b - '0'; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
54 |
else if (b >= 'a' && b <= 'f') value += b - 'a' + 10; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
55 |
else if (b >= 'A' && b <= 'F') value += b - 'A' + 10; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
56 |
else return false; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
57 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
58 |
if (resultBuffer) resultBuffer[i] = value; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
59 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
60 |
return true; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
61 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
62 |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
63 |
public: |
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
64 |
|
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
65 |
std::string unescape(const std::string& s, const TextType type) override { |
27
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
66 |
std::stringstream result; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
67 |
for (int i = 0, length = s.size(); i < length; i++) { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
68 |
char ch = s[i]; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
69 |
if (i + 1 < length && ch == ESC) { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
70 |
ch = s[i + 1]; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
71 |
if (ch == 'u') { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
72 |
// TODO: simplify, clean-up, verify (but seems working) |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
73 |
i++; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
74 |
int hexLength = 4; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
75 |
if (i + hexLength < length) { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
76 |
uint16_t u16; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
77 |
bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16)); |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
78 |
if (hexOK) result << convertor.to_bytes(ntohs(u16)); |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
79 |
else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX")); |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
80 |
i += hexLength; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
81 |
} else { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
82 |
throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters")); |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
83 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
84 |
|
28
0e7c57d48d1e
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
27
diff
changeset
|
85 |
} else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle |
27
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
86 |
else result.put(ESC); // keep the escape sequence for later unescaping phase |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
87 |
} else if (ch == ESC) { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
88 |
throw std::logic_error(std::string("Missing escape sequence")); // this should not happen |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
89 |
} else { |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
90 |
result.put(ch); |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
91 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
92 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
93 |
return result.str(); |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
94 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
95 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
96 |
}; |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
97 |
|
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
98 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
99 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
100 |
} |
fd669e73d39a
unescape also Java .properties encoding (\uXXXX)
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
101 |
} |