attribute name escaping: first version v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sat, 18 Feb 2023 22:57:22 +0100
branchv_0
changeset 16 1731e8dff446
parent 15 e5421eea0583
child 17 f67047a1e19e
attribute name escaping: first version
src/RecfileHandler.h
--- a/src/RecfileHandler.h	Sat Sep 10 19:27:47 2022 +0200
+++ b/src/RecfileHandler.h	Sat Feb 18 22:57:22 2023 +0100
@@ -72,9 +72,40 @@
 		output << std::endl;
 	}
 
+	bool between(wchar_t ch, wchar_t start, wchar_t end) {
+		return ch >= start && ch <= end;
+	}
+
+	bool isValidNameCharacter(wchar_t ch, bool first) {
+		if (first) {
+			// also '%' is technically valid here, but it is used for special
+			// purposes like the relation name or attribute types
+			return /**/between(ch, L'a', L'z')
+					|| between(ch, L'A', L'Z');
+		} else {
+			return ch == L'_'
+					|| between(ch, L'a', L'z')
+					|| between(ch, L'A', L'Z')
+					|| between(ch, L'0', L'9');
+		}
+	}
+
 	void writeAttribute(const string_t& name, const TypeId& type, const string_t& value) {
-		// FIXME: escaping/filtering
-		output << convertor.to_bytes(name) << ": ";
+		// TODO: multiple escapting mode - including one that is not lossless
+		// but allows writing a single '_' inside the name
+		for (size_t i = 0, limit = name.size(); i < limit; i++) {
+			wchar_t ch = name[i];
+			bool valid = isValidNameCharacter(ch, i == 0);
+
+			// Not a lossless round-trip
+			// (maybe we could sacrifice some reserved prefix):
+			if (i == 0 && !valid) output << 'x';
+
+			if (ch == '_') output << "__";
+			else if (valid) output << convertor.to_bytes(ch);
+			else output << '_' << ((uint32_t) ch) << '_';
+		}
+		output << ": ";
 
 		for (char ch : convertor.to_bytes(value)) {
 			output << ch;