src/XMLNameCodec.h
branchv_0
changeset 21 053054f9f702
child 22 53f1f3a5649a
equal deleted inserted replaced
20:e67584a06be6 21:053054f9f702
       
     1 /**
       
     2  * Relational pipes
       
     3  * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
       
     4  *
       
     5  * This program is free software: you can redistribute it and/or modify
       
     6  * it under the terms of the GNU General Public License as published by
       
     7  * the Free Software Foundation, version 3 of the License.
       
     8  *
       
     9  * This program is distributed in the hope that it will be useful,
       
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
       
    12  * GNU General Public License for more details.
       
    13  *
       
    14  * You should have received a copy of the GNU General Public License
       
    15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
       
    16  */
       
    17 #pragma once
       
    18 
       
    19 #include <sstream>
       
    20 #include <iomanip>
       
    21 
       
    22 #include <glibmm-2.4/glibmm/ustring.h>
       
    23 
       
    24 namespace relpipe {
       
    25 namespace in {
       
    26 namespace xmltable {
       
    27 
       
    28 class XMLNameCodec {
       
    29 private:
       
    30 	static const char DEFAULT_ESCAPING_CHARACTER = '_';
       
    31 	const char esc;
       
    32 
       
    33 	bool between(gunichar codepoint, gunichar start, gunichar end) {
       
    34 		return codepoint >= start && codepoint <= end;
       
    35 	}
       
    36 
       
    37 	bool isValidNameStartChar(gunichar codepoint) {
       
    38 		// NameStartChar  ::= ":" | [A-Z] | "_" | [a-z] 
       
    39 		//   | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
       
    40 		//   | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
       
    41 		//   | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
       
    42 		return codepoint == ':' || between(codepoint, 'A', 'Z') || codepoint == '_' || between(codepoint, 'a', 'z')
       
    43 				|| between(codepoint, 0xC0, 0xD6) || between(codepoint, 0xD8, 0xF6) || between(codepoint, 0xF8, 0x2FF) || between(codepoint, 0x370, 0x37D) || between(codepoint, 0x37F, 0x1FFF)
       
    44 				|| between(codepoint, 0x200C, 0x200D) || between(codepoint, 0x2070, 0x218F) || between(codepoint, 0x2C00, 0x2FEF) || between(codepoint, 0x3001, 0xD7FF)
       
    45 				|| between(codepoint, 0xF900, 0xFDCF) || between(codepoint, 0xFDF0, 0xFFFD) || between(codepoint, 0x10000, 0xEFFFF);
       
    46 	}
       
    47 
       
    48 	bool isValidNameChar(gunichar codepoint) {
       
    49 		// NameChar       ::= NameStartChar | "-" | "." | [0-9] 
       
    50 		//   | #xB7
       
    51 		//   | [#x0300-#x036F] | [#x203F-#x2040]
       
    52 		return isValidNameStartChar(codepoint) || codepoint == '-' || codepoint == '.' || between(codepoint, '0', '9')
       
    53 				|| codepoint == 0xB7
       
    54 				|| between(codepoint, 0x0300, 0x036F) || between(codepoint, 0x203F, 0x2040);
       
    55 	}
       
    56 
       
    57 public:
       
    58 
       
    59 	XMLNameCodec() : esc(DEFAULT_ESCAPING_CHARACTER) {
       
    60 	}
       
    61 
       
    62 	XMLNameCodec(const char esc) : esc(esc) {
       
    63 	}
       
    64 
       
    65 	virtual ~XMLNameCodec() {
       
    66 	}
       
    67 
       
    68 	Glib::ustring encode(Glib::ustring name) {
       
    69 		if (name.empty()) {
       
    70 			return "_";
       
    71 		} else {
       
    72 			std::stringstream result;
       
    73 
       
    74 			for (int i = 0; i < name.size(); i++) {
       
    75 				gunichar codepoint = name[i];
       
    76 				if (codepoint == esc) {
       
    77 					result.put(esc);
       
    78 					result.put(esc);
       
    79 					continue;
       
    80 				} else if (i == 0) {
       
    81 					if (isValidNameStartChar(codepoint)) {
       
    82 						result << Glib::ustring(1, codepoint);
       
    83 						continue;
       
    84 					} else {
       
    85 						result.put('_');
       
    86 					}
       
    87 				} else if (isValidNameChar(codepoint)) {
       
    88 					result << Glib::ustring(1, codepoint);
       
    89 					continue;
       
    90 				}
       
    91 
       
    92 				result.put(esc);
       
    93 				result << Glib::ustring::format(std::hex, std::setfill(L'0'), std::setw(2), codepoint);
       
    94 				result.put(esc);
       
    95 			}
       
    96 
       
    97 			return result.str();
       
    98 		}
       
    99 	}
       
   100 
       
   101 
       
   102 };
       
   103 
       
   104 }
       
   105 }
       
   106 }