|
1 /** |
|
2 * Relational pipes |
|
3 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 * |
|
5 * This program is free software: you can redistribute it and/or modify |
|
6 * it under the terms of the GNU General Public License as published by |
|
7 * the Free Software Foundation, version 3 of the License. |
|
8 * |
|
9 * This program is distributed in the hope that it will be useful, |
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 * GNU General Public License for more details. |
|
13 * |
|
14 * You should have received a copy of the GNU General Public License |
|
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 */ |
|
17 #pragma once |
|
18 |
|
19 #include <sstream> |
|
20 #include <iomanip> |
|
21 |
|
22 #include <glibmm-2.4/glibmm/ustring.h> |
|
23 |
|
24 namespace relpipe { |
|
25 namespace in { |
|
26 namespace xmltable { |
|
27 |
|
28 class XMLNameCodec { |
|
29 private: |
|
30 static const char DEFAULT_ESCAPING_CHARACTER = '_'; |
|
31 const char esc; |
|
32 |
|
33 bool between(gunichar codepoint, gunichar start, gunichar end) { |
|
34 return codepoint >= start && codepoint <= end; |
|
35 } |
|
36 |
|
37 bool isValidNameStartChar(gunichar codepoint) { |
|
38 // NameStartChar ::= ":" | [A-Z] | "_" | [a-z] |
|
39 // | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |
|
40 // | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
|
41 // | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] |
|
42 return codepoint == ':' || between(codepoint, 'A', 'Z') || codepoint == '_' || between(codepoint, 'a', 'z') |
|
43 || between(codepoint, 0xC0, 0xD6) || between(codepoint, 0xD8, 0xF6) || between(codepoint, 0xF8, 0x2FF) || between(codepoint, 0x370, 0x37D) || between(codepoint, 0x37F, 0x1FFF) |
|
44 || between(codepoint, 0x200C, 0x200D) || between(codepoint, 0x2070, 0x218F) || between(codepoint, 0x2C00, 0x2FEF) || between(codepoint, 0x3001, 0xD7FF) |
|
45 || between(codepoint, 0xF900, 0xFDCF) || between(codepoint, 0xFDF0, 0xFFFD) || between(codepoint, 0x10000, 0xEFFFF); |
|
46 } |
|
47 |
|
48 bool isValidNameChar(gunichar codepoint) { |
|
49 // NameChar ::= NameStartChar | "-" | "." | [0-9] |
|
50 // | #xB7 |
|
51 // | [#x0300-#x036F] | [#x203F-#x2040] |
|
52 return isValidNameStartChar(codepoint) || codepoint == '-' || codepoint == '.' || between(codepoint, '0', '9') |
|
53 || codepoint == 0xB7 |
|
54 || between(codepoint, 0x0300, 0x036F) || between(codepoint, 0x203F, 0x2040); |
|
55 } |
|
56 |
|
57 public: |
|
58 |
|
59 XMLNameCodec() : esc(DEFAULT_ESCAPING_CHARACTER) { |
|
60 } |
|
61 |
|
62 XMLNameCodec(const char esc) : esc(esc) { |
|
63 } |
|
64 |
|
65 virtual ~XMLNameCodec() { |
|
66 } |
|
67 |
|
68 Glib::ustring encode(Glib::ustring name) { |
|
69 if (name.empty()) { |
|
70 return "_"; |
|
71 } else { |
|
72 std::stringstream result; |
|
73 |
|
74 for (int i = 0; i < name.size(); i++) { |
|
75 gunichar codepoint = name[i]; |
|
76 if (codepoint == esc) { |
|
77 result.put(esc); |
|
78 result.put(esc); |
|
79 continue; |
|
80 } else if (i == 0) { |
|
81 if (isValidNameStartChar(codepoint)) { |
|
82 result << Glib::ustring(1, codepoint); |
|
83 continue; |
|
84 } else { |
|
85 result.put('_'); |
|
86 } |
|
87 } else if (isValidNameChar(codepoint)) { |
|
88 result << Glib::ustring(1, codepoint); |
|
89 continue; |
|
90 } |
|
91 |
|
92 result.put(esc); |
|
93 result << Glib::ustring::format(std::hex, std::setfill(L'0'), std::setw(2), codepoint); |
|
94 result.put(esc); |
|
95 } |
|
96 |
|
97 return result.str(); |
|
98 } |
|
99 } |
|
100 |
|
101 |
|
102 }; |
|
103 |
|
104 } |
|
105 } |
|
106 } |