# HG changeset patch # User František Kučera # Date 1676757442 -3600 # Node ID 1731e8dff446fb845344c7206062efea25c55e34 # Parent e5421eea05836ca5edc3d6fd5de8e2145a3e91bf attribute name escaping: first version diff -r e5421eea0583 -r 1731e8dff446 src/RecfileHandler.h --- a/src/RecfileHandler.h Sat Sep 10 19:27:47 2022 +0200 +++ b/src/RecfileHandler.h Sat Feb 18 22:57:22 2023 +0100 @@ -72,9 +72,40 @@ output << std::endl; } + bool between(wchar_t ch, wchar_t start, wchar_t end) { + return ch >= start && ch <= end; + } + + bool isValidNameCharacter(wchar_t ch, bool first) { + if (first) { + // also '%' is technically valid here, but it is used for special + // purposes like the relation name or attribute types + return /**/between(ch, L'a', L'z') + || between(ch, L'A', L'Z'); + } else { + return ch == L'_' + || between(ch, L'a', L'z') + || between(ch, L'A', L'Z') + || between(ch, L'0', L'9'); + } + } + void writeAttribute(const string_t& name, const TypeId& type, const string_t& value) { - // FIXME: escaping/filtering - output << convertor.to_bytes(name) << ": "; + // TODO: multiple escapting mode - including one that is not lossless + // but allows writing a single '_' inside the name + for (size_t i = 0, limit = name.size(); i < limit; i++) { + wchar_t ch = name[i]; + bool valid = isValidNameCharacter(ch, i == 0); + + // Not a lossless round-trip + // (maybe we could sacrifice some reserved prefix): + if (i == 0 && !valid) output << 'x'; + + if (ch == '_') output << "__"; + else if (valid) output << convertor.to_bytes(ch); + else output << '_' << ((uint32_t) ch) << '_'; + } + output << ": "; for (char ch : convertor.to_bytes(value)) { output << ch;