27 |
28 |
28 class XMLNameCodec { |
29 class XMLNameCodec { |
29 private: |
30 private: |
30 static const char DEFAULT_ESCAPING_CHARACTER = '_'; |
31 static const char DEFAULT_ESCAPING_CHARACTER = '_'; |
31 const char esc; |
32 const char esc; |
|
33 const bool namespaceAware; |
32 |
34 |
33 bool between(gunichar codepoint, gunichar start, gunichar end) { |
35 bool between(gunichar codepoint, gunichar start, gunichar end) { |
34 return codepoint >= start && codepoint <= end; |
36 return codepoint >= start && codepoint <= end; |
35 } |
37 } |
36 |
38 |
37 bool isValidNameStartChar(gunichar codepoint, bool namespaceAware = true) { |
39 /** |
|
40 * https://www.w3.org/TR/REC-xml/#NT-NameStartChar |
|
41 * |
|
42 * @param codepoint unicode character |
|
43 * @return whether this character is allowed at the beginning of a XML name |
|
44 */ |
|
45 bool isValidNameStartChar(gunichar codepoint) { |
38 // NameStartChar ::= ":" | [A-Z] | "_" | [a-z] |
46 // NameStartChar ::= ":" | [A-Z] | "_" | [a-z] |
39 // | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |
47 // | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |
40 // | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
48 // | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
41 // | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] |
49 // | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] |
42 return (codepoint == ':' && !namespaceAware) || between(codepoint, 'A', 'Z') || codepoint == '_' || between(codepoint, 'a', 'z') |
50 return (codepoint == ':' && !namespaceAware) || between(codepoint, 'A', 'Z') || codepoint == '_' || between(codepoint, 'a', 'z') |
43 || between(codepoint, 0xC0, 0xD6) || between(codepoint, 0xD8, 0xF6) || between(codepoint, 0xF8, 0x2FF) || between(codepoint, 0x370, 0x37D) || between(codepoint, 0x37F, 0x1FFF) |
51 || between(codepoint, 0xC0, 0xD6) || between(codepoint, 0xD8, 0xF6) || between(codepoint, 0xF8, 0x2FF) || between(codepoint, 0x370, 0x37D) || between(codepoint, 0x37F, 0x1FFF) |
44 || between(codepoint, 0x200C, 0x200D) || between(codepoint, 0x2070, 0x218F) || between(codepoint, 0x2C00, 0x2FEF) || between(codepoint, 0x3001, 0xD7FF) |
52 || between(codepoint, 0x200C, 0x200D) || between(codepoint, 0x2070, 0x218F) || between(codepoint, 0x2C00, 0x2FEF) || between(codepoint, 0x3001, 0xD7FF) |
45 || between(codepoint, 0xF900, 0xFDCF) || between(codepoint, 0xFDF0, 0xFFFD) || between(codepoint, 0x10000, 0xEFFFF); |
53 || between(codepoint, 0xF900, 0xFDCF) || between(codepoint, 0xFDF0, 0xFFFD) || between(codepoint, 0x10000, 0xEFFFF); |
46 } |
54 } |
47 |
55 |
48 bool isValidNameChar(gunichar codepoint, bool namespaceAware = true) { |
56 /** |
|
57 * https://www.w3.org/TR/REC-xml/#NT-NameChar |
|
58 * |
|
59 * @param codepoint unicode character |
|
60 * @return whether this character is allowed in a XML name |
|
61 */ |
|
62 bool isValidNameChar(gunichar codepoint) { |
49 // NameChar ::= NameStartChar | "-" | "." | [0-9] |
63 // NameChar ::= NameStartChar | "-" | "." | [0-9] |
50 // | #xB7 |
64 // | #xB7 |
51 // | [#x0300-#x036F] | [#x203F-#x2040] |
65 // | [#x0300-#x036F] | [#x203F-#x2040] |
52 return isValidNameStartChar(codepoint, namespaceAware) || codepoint == '-' || codepoint == '.' || between(codepoint, '0', '9') |
66 return isValidNameStartChar(codepoint) || codepoint == '-' || codepoint == '.' || between(codepoint, '0', '9') |
53 || codepoint == 0xB7 |
67 || codepoint == 0xB7 |
54 || between(codepoint, 0x0300, 0x036F) || between(codepoint, 0x203F, 0x2040); |
68 || between(codepoint, 0x0300, 0x036F) || between(codepoint, 0x203F, 0x2040); |
55 } |
69 } |
56 |
70 |
57 public: |
71 public: |
58 |
72 |
59 XMLNameCodec() : esc(DEFAULT_ESCAPING_CHARACTER) { |
73 XMLNameCodec() : XMLNameCodec(DEFAULT_ESCAPING_CHARACTER, true) { |
60 } |
74 } |
61 |
75 |
62 XMLNameCodec(const char esc) : esc(esc) { |
76 /** |
|
77 * @param escapingCharacter must be valid character allowed not only in the middle of the XML name but also as the |
|
78 * first character of the name |
|
79 * @param namespaceAware colon character is reserved as a separator of the prefix and the local name, see |
|
80 * https://www.w3.org/TR/REC-xml-names/#NT-NCName |
|
81 * @throws std::invalid_argument if escapingCharacter is not valid |
|
82 */ |
|
83 XMLNameCodec(const char escapingCharacter, const bool namespaceAware) : esc(escapingCharacter), namespaceAware(namespaceAware) { |
|
84 // TODO: allow also characters like #xB7 and add another escaping if they occur at the beginning of the name? |
|
85 if (!isValidNameStartChar(esc)) { |
|
86 throw std::invalid_argument("The character „" + std::to_string(escapingCharacter) + "“ is not allowed at the beginning of a XML name and thus not usable for escaping"); |
|
87 } |
63 } |
88 } |
64 |
89 |
65 virtual ~XMLNameCodec() { |
90 virtual ~XMLNameCodec() { |
66 } |
91 } |
67 |
92 |
|
93 /** |
|
94 * @param name any string |
|
95 * @return valid name of XML element or attribute |
|
96 */ |
68 Glib::ustring encode(Glib::ustring name) { |
97 Glib::ustring encode(Glib::ustring name) { |
69 if (name.empty()) { |
98 if (name.empty()) { |
70 return "_"; |
99 return Glib::ustring(1, esc); |
71 } else { |
100 } else { |
72 std::stringstream result; |
101 std::stringstream result; |
73 |
102 |
74 for (int i = 0; i < name.size(); i++) { |
103 for (int i = 0; i < name.size(); i++) { |
75 gunichar codepoint = name[i]; |
104 gunichar codepoint = name[i]; |
76 if (codepoint == esc) { |
105 if (codepoint == esc) { |
77 result.put(esc); |
106 result.put(esc); |
78 result.put(esc); |
107 result.put(esc); |
79 continue; |
108 continue; |
80 } else if (i == 0) { |
109 } else if ((i == 0 && isValidNameStartChar(codepoint)) || (i > 0 && isValidNameChar(codepoint))) { |
81 if (isValidNameStartChar(codepoint)) { |
|
82 result << Glib::ustring(1, codepoint); |
|
83 continue; |
|
84 } else { |
|
85 result.put('_'); |
|
86 } |
|
87 } else if (isValidNameChar(codepoint)) { |
|
88 result << Glib::ustring(1, codepoint); |
110 result << Glib::ustring(1, codepoint); |
89 continue; |
111 continue; |
90 } |
112 } |
91 |
113 |
92 result.put(esc); |
114 result.put(esc); |