author | František Kučera <franta-hg@frantovo.cz> |
Sat, 28 Nov 2020 18:10:47 +0100 | |
branch | v_0 |
changeset 27 | e9aad9dd823a |
parent 18 | src/lib/JavaPropertiesUnescapingINIHandler.h@a8c1381ef103 |
permissions | -rw-r--r-- |
16 | 1 |
/** |
2 |
* Relational pipes |
|
3 |
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 |
* |
|
5 |
* This program is free software: you can redistribute it and/or modify |
|
6 |
* it under the terms of the GNU General Public License as published by |
|
7 |
* the Free Software Foundation, version 3 of the License. |
|
8 |
* |
|
9 |
* This program is distributed in the hope that it will be useful, |
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
* GNU General Public License for more details. |
|
13 |
* |
|
14 |
* You should have received a copy of the GNU General Public License |
|
15 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 |
*/ |
|
17 |
#pragma once |
|
18 |
||
19 |
#include <sstream> |
|
17
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
20 |
#include <codecvt> |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
21 |
#include <arpa/inet.h> |
16 | 22 |
|
27
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
23 |
#include "UnescapingProcessor.h" |
16 | 24 |
|
25 |
using namespace std; |
|
26 |
using namespace relpipe::writer; |
|
27 |
||
28 |
namespace relpipe { |
|
29 |
namespace in { |
|
30 |
namespace ini { |
|
31 |
namespace lib { |
|
32 |
||
27
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
33 |
/** |
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
34 |
* Should work according to <https://docs.oracle.com/javase/specs/jls/se15/html/jls-3.html#jls-3.3> 3.3. Unicode Escapes |
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
35 |
*/ |
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
36 |
class JavaPropertiesUnescapingProcessor : public UnescapingProcessor { |
17
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
37 |
private: |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
38 |
wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8 |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
39 |
|
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
40 |
bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) { |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
41 |
if (hexLength != binLength * 2) return false; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
42 |
|
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
43 |
for (size_t i = 0; i < binLength; i++) { |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
44 |
uint8_t value = 0; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
45 |
char a = hexadecimal[i * 2]; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
46 |
char b = hexadecimal[i * 2 + 1]; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
47 |
|
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
48 |
if (a >= '0' && a <= '9') value += (a - '0')*16; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
49 |
else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
50 |
else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
51 |
else return false; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
52 |
|
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
53 |
if (b >= '0' && b <= '9') value += b - '0'; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
54 |
else if (b >= 'a' && b <= 'f') value += b - 'a' + 10; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
55 |
else if (b >= 'A' && b <= 'F') value += b - 'A' + 10; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
56 |
else return false; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
57 |
|
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
58 |
if (resultBuffer) resultBuffer[i] = value; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
59 |
} |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
60 |
return true; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
61 |
} |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
62 |
|
27
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
63 |
public: |
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
64 |
|
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
65 |
std::string unescape(const std::string& s, const TextType type) override { |
16 | 66 |
std::stringstream result; |
67 |
for (int i = 0, length = s.size(); i < length; i++) { |
|
68 |
char ch = s[i]; |
|
69 |
if (i + 1 < length && ch == ESC) { |
|
70 |
ch = s[i + 1]; |
|
17
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
71 |
if (ch == 'u') { |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
72 |
// TODO: simplify, clean-up, verify (but seems working) |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
73 |
i++; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
74 |
int hexLength = 4; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
75 |
if (i + hexLength < length) { |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
76 |
uint16_t u16; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
77 |
bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16)); |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
78 |
if (hexOK) result << convertor.to_bytes(ntohs(u16)); |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
79 |
else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX")); |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
80 |
i += hexLength; |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
81 |
} else { |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
82 |
throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters")); |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
83 |
} |
4b1612d20cb2
unescape also Java .properties encoding (\uXXXX): first version
František Kučera <franta-hg@frantovo.cz>
parents:
16
diff
changeset
|
84 |
|
27
e9aad9dd823a
configurable unescaping processors
František Kučera <franta-hg@frantovo.cz>
parents:
18
diff
changeset
|
85 |
} else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle |
16 | 86 |
else result.put(ESC); // keep the escape sequence for later unescaping phase |
87 |
} else if (ch == ESC) { |
|
88 |
throw std::logic_error(std::string("Missing escape sequence")); // this should not happen |
|
89 |
} else { |
|
90 |
result.put(ch); |
|
91 |
} |
|
92 |
} |
|
93 |
return result.str(); |
|
94 |
} |
|
95 |
||
96 |
}; |
|
97 |
||
98 |
} |
|
99 |
} |
|
100 |
} |
|
101 |
} |