/**
* Relational pipes
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <glibmm-2.4/glibmm/ustring.h>
namespace relpipe {
namespace in {
namespace xmltable {
class XMLNameCodec {
private:
static const char DEFAULT_ESCAPING_CHARACTER = '_';
const char esc;
const bool namespaceAware;
bool between(gunichar codepoint, gunichar start, gunichar end) {
return codepoint >= start && codepoint <= end;
}
/**
* https://www.w3.org/TR/REC-xml/#NT-NameStartChar
*
* @param codepoint unicode character
* @return whether this character is allowed at the beginning of a XML name
*/
bool isValidNameStartChar(gunichar codepoint) {
// NameStartChar ::= ":" | [A-Z] | "_" | [a-z]
// | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
// | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
// | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
return (codepoint == ':' && !namespaceAware) || between(codepoint, 'A', 'Z') || codepoint == '_' || between(codepoint, 'a', 'z')
|| between(codepoint, 0xC0, 0xD6) || between(codepoint, 0xD8, 0xF6) || between(codepoint, 0xF8, 0x2FF) || between(codepoint, 0x370, 0x37D) || between(codepoint, 0x37F, 0x1FFF)
|| between(codepoint, 0x200C, 0x200D) || between(codepoint, 0x2070, 0x218F) || between(codepoint, 0x2C00, 0x2FEF) || between(codepoint, 0x3001, 0xD7FF)
|| between(codepoint, 0xF900, 0xFDCF) || between(codepoint, 0xFDF0, 0xFFFD) || between(codepoint, 0x10000, 0xEFFFF);
}
/**
* https://www.w3.org/TR/REC-xml/#NT-NameChar
*
* @param codepoint unicode character
* @return whether this character is allowed in a XML name
*/
bool isValidNameChar(gunichar codepoint) {
// NameChar ::= NameStartChar | "-" | "." | [0-9]
// | #xB7
// | [#x0300-#x036F] | [#x203F-#x2040]
return isValidNameStartChar(codepoint) || codepoint == '-' || codepoint == '.' || between(codepoint, '0', '9')
|| codepoint == 0xB7
|| between(codepoint, 0x0300, 0x036F) || between(codepoint, 0x203F, 0x2040);
}
public:
XMLNameCodec() : XMLNameCodec(DEFAULT_ESCAPING_CHARACTER, true) {
}
/**
* @param escapingCharacter must be valid character allowed not only in the middle of the XML name but also as the
* first character of the name
* @param namespaceAware colon character is reserved as a separator of the prefix and the local name, see
* https://www.w3.org/TR/REC-xml-names/#NT-NCName
* @throws std::invalid_argument if escapingCharacter is not valid
*/
XMLNameCodec(const char escapingCharacter, const bool namespaceAware) : esc(escapingCharacter), namespaceAware(namespaceAware) {
// TODO: allow also characters like #xB7 and add another escaping if they occur at the beginning of the name?
if (!isValidNameStartChar(esc)) {
throw std::invalid_argument("The character „" + std::to_string(escapingCharacter) + "“ is not allowed at the beginning of a XML name and thus not usable for escaping");
}
}
virtual ~XMLNameCodec() {
}
/**
* @param name any string
* @return valid name of XML element or attribute
*/
Glib::ustring encode(Glib::ustring name) {
if (name.empty()) {
return Glib::ustring(1, esc);
} else {
std::stringstream result;
for (int i = 0; i < name.size(); i++) {
gunichar codepoint = name[i];
if (codepoint == esc) {
result.put(esc);
result.put(esc);
continue;
} else if ((i == 0 && isValidNameStartChar(codepoint)) || (i > 0 && isValidNameChar(codepoint))) {
result << Glib::ustring(1, codepoint);
continue;
}
result.put(esc);
result << Glib::ustring::format(std::hex, std::setfill(L'0'), std::setw(2), codepoint);
result.put(esc);
}
return result.str();
}
}
};
}
}
}