src/XMLNameCodec.h
branchv_0
changeset 23 b25404ff2b2b
parent 22 53f1f3a5649a
equal deleted inserted replaced
22:53f1f3a5649a 23:b25404ff2b2b
    16  */
    16  */
    17 #pragma once
    17 #pragma once
    18 
    18 
    19 #include <sstream>
    19 #include <sstream>
    20 #include <iomanip>
    20 #include <iomanip>
       
    21 #include <stdexcept>
    21 
    22 
    22 #include <glibmm-2.4/glibmm/ustring.h>
    23 #include <glibmm-2.4/glibmm/ustring.h>
    23 
    24 
    24 namespace relpipe {
    25 namespace relpipe {
    25 namespace in {
    26 namespace in {
    27 
    28 
    28 class XMLNameCodec {
    29 class XMLNameCodec {
    29 private:
    30 private:
    30 	static const char DEFAULT_ESCAPING_CHARACTER = '_';
    31 	static const char DEFAULT_ESCAPING_CHARACTER = '_';
    31 	const char esc;
    32 	const char esc;
       
    33 	const bool namespaceAware;
    32 
    34 
    33 	bool between(gunichar codepoint, gunichar start, gunichar end) {
    35 	bool between(gunichar codepoint, gunichar start, gunichar end) {
    34 		return codepoint >= start && codepoint <= end;
    36 		return codepoint >= start && codepoint <= end;
    35 	}
    37 	}
    36 
    38 
    37 	bool isValidNameStartChar(gunichar codepoint, bool namespaceAware = true) {
    39 	/**
       
    40 	 * https://www.w3.org/TR/REC-xml/#NT-NameStartChar
       
    41 	 *
       
    42 	 * @param codepoint unicode character
       
    43 	 * @return whether this character is allowed at the beginning of a XML name
       
    44 	 */
       
    45 	bool isValidNameStartChar(gunichar codepoint) {
    38 		// NameStartChar  ::= ":" | [A-Z] | "_" | [a-z] 
    46 		// NameStartChar  ::= ":" | [A-Z] | "_" | [a-z] 
    39 		//   | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
    47 		//   | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
    40 		//   | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
    48 		//   | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
    41 		//   | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
    49 		//   | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
    42 		return (codepoint == ':' && !namespaceAware) || between(codepoint, 'A', 'Z') || codepoint == '_' || between(codepoint, 'a', 'z')
    50 		return (codepoint == ':' && !namespaceAware) || between(codepoint, 'A', 'Z') || codepoint == '_' || between(codepoint, 'a', 'z')
    43 				|| between(codepoint, 0xC0, 0xD6) || between(codepoint, 0xD8, 0xF6) || between(codepoint, 0xF8, 0x2FF) || between(codepoint, 0x370, 0x37D) || between(codepoint, 0x37F, 0x1FFF)
    51 				|| between(codepoint, 0xC0, 0xD6) || between(codepoint, 0xD8, 0xF6) || between(codepoint, 0xF8, 0x2FF) || between(codepoint, 0x370, 0x37D) || between(codepoint, 0x37F, 0x1FFF)
    44 				|| between(codepoint, 0x200C, 0x200D) || between(codepoint, 0x2070, 0x218F) || between(codepoint, 0x2C00, 0x2FEF) || between(codepoint, 0x3001, 0xD7FF)
    52 				|| between(codepoint, 0x200C, 0x200D) || between(codepoint, 0x2070, 0x218F) || between(codepoint, 0x2C00, 0x2FEF) || between(codepoint, 0x3001, 0xD7FF)
    45 				|| between(codepoint, 0xF900, 0xFDCF) || between(codepoint, 0xFDF0, 0xFFFD) || between(codepoint, 0x10000, 0xEFFFF);
    53 				|| between(codepoint, 0xF900, 0xFDCF) || between(codepoint, 0xFDF0, 0xFFFD) || between(codepoint, 0x10000, 0xEFFFF);
    46 	}
    54 	}
    47 
    55 
    48 	bool isValidNameChar(gunichar codepoint, bool namespaceAware = true) {
    56 	/**
       
    57 	 * https://www.w3.org/TR/REC-xml/#NT-NameChar
       
    58 	 *
       
    59 	 * @param codepoint unicode character
       
    60 	 * @return whether this character is allowed in a XML name
       
    61 	 */
       
    62 	bool isValidNameChar(gunichar codepoint) {
    49 		// NameChar       ::= NameStartChar | "-" | "." | [0-9] 
    63 		// NameChar       ::= NameStartChar | "-" | "." | [0-9] 
    50 		//   | #xB7
    64 		//   | #xB7
    51 		//   | [#x0300-#x036F] | [#x203F-#x2040]
    65 		//   | [#x0300-#x036F] | [#x203F-#x2040]
    52 		return isValidNameStartChar(codepoint, namespaceAware) || codepoint == '-' || codepoint == '.' || between(codepoint, '0', '9')
    66 		return isValidNameStartChar(codepoint) || codepoint == '-' || codepoint == '.' || between(codepoint, '0', '9')
    53 				|| codepoint == 0xB7
    67 				|| codepoint == 0xB7
    54 				|| between(codepoint, 0x0300, 0x036F) || between(codepoint, 0x203F, 0x2040);
    68 				|| between(codepoint, 0x0300, 0x036F) || between(codepoint, 0x203F, 0x2040);
    55 	}
    69 	}
    56 
    70 
    57 public:
    71 public:
    58 
    72 
    59 	XMLNameCodec() : esc(DEFAULT_ESCAPING_CHARACTER) {
    73 	XMLNameCodec() : XMLNameCodec(DEFAULT_ESCAPING_CHARACTER, true) {
    60 	}
    74 	}
    61 
    75 
    62 	XMLNameCodec(const char esc) : esc(esc) {
    76 	/**
       
    77 	 * @param escapingCharacter must be valid character allowed not only in the middle of the XML name but also as the
       
    78 	 * first character of the name
       
    79 	 * @param namespaceAware colon character is reserved as a separator of the prefix and the local name, see
       
    80 	 * https://www.w3.org/TR/REC-xml-names/#NT-NCName
       
    81 	 * @throws std::invalid_argument if escapingCharacter is not valid
       
    82 	 */
       
    83 	XMLNameCodec(const char escapingCharacter, const bool namespaceAware) : esc(escapingCharacter), namespaceAware(namespaceAware) {
       
    84 		// TODO: allow also characters like #xB7 and add another escaping if they occur at the beginning of the name?
       
    85 		if (!isValidNameStartChar(esc)) {
       
    86 			throw std::invalid_argument("The character „" + std::to_string(escapingCharacter) + "“ is not allowed at the beginning of a XML name and thus not usable for escaping");
       
    87 		}
    63 	}
    88 	}
    64 
    89 
    65 	virtual ~XMLNameCodec() {
    90 	virtual ~XMLNameCodec() {
    66 	}
    91 	}
    67 
    92 
       
    93 	/**
       
    94 	 * @param name any string
       
    95 	 * @return valid name of XML element or attribute
       
    96 	 */
    68 	Glib::ustring encode(Glib::ustring name) {
    97 	Glib::ustring encode(Glib::ustring name) {
    69 		if (name.empty()) {
    98 		if (name.empty()) {
    70 			return "_";
    99 			return Glib::ustring(1, esc);
    71 		} else {
   100 		} else {
    72 			std::stringstream result;
   101 			std::stringstream result;
    73 
   102 
    74 			for (int i = 0; i < name.size(); i++) {
   103 			for (int i = 0; i < name.size(); i++) {
    75 				gunichar codepoint = name[i];
   104 				gunichar codepoint = name[i];
    76 				if (codepoint == esc) {
   105 				if (codepoint == esc) {
    77 					result.put(esc);
   106 					result.put(esc);
    78 					result.put(esc);
   107 					result.put(esc);
    79 					continue;
   108 					continue;
    80 				} else if (i == 0) {
   109 				} else if ((i == 0 && isValidNameStartChar(codepoint)) || (i > 0 && isValidNameChar(codepoint))) {
    81 					if (isValidNameStartChar(codepoint)) {
       
    82 						result << Glib::ustring(1, codepoint);
       
    83 						continue;
       
    84 					} else {
       
    85 						result.put('_');
       
    86 					}
       
    87 				} else if (isValidNameChar(codepoint)) {
       
    88 					result << Glib::ustring(1, codepoint);
   110 					result << Glib::ustring(1, codepoint);
    89 					continue;
   111 					continue;
    90 				}
   112 				}
    91 
   113 
    92 				result.put(esc);
   114 				result.put(esc);
    96 
   118 
    97 			return result.str();
   119 			return result.str();
    98 		}
   120 		}
    99 	}
   121 	}
   100 
   122 
   101 
       
   102 };
   123 };
   103 
   124 
   104 }
   125 }
   105 }
   126 }
   106 }
   127 }