attribute name escaping: also attributes in the relation header v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sun, 19 Feb 2023 00:36:47 +0100
branchv_0
changeset 17 f67047a1e19e
parent 16 1731e8dff446
child 18 002077ecb17a
attribute name escaping: also attributes in the relation header
src/RecfileHandler.h
--- a/src/RecfileHandler.h	Sat Feb 18 22:57:22 2023 +0100
+++ b/src/RecfileHandler.h	Sun Feb 19 00:36:47 2023 +0100
@@ -43,7 +43,8 @@
 	wstring_convert<codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8
 	std::vector<TypeId> attributeTypes;
 	std::vector<string_t> attributeTypeCodes;
-	std::vector<string_t> attributeNames;
+	std::vector<string_t> attributeNamesIn;
+	std::vector<string_t> attributeNamesOut;
 	integer_t valueCount = 0;
 	integer_t attributeCount = 0;
 	integer_t relationCount = 0;
@@ -62,10 +63,9 @@
 		}
 	}
 
-	void writeAttributeMetadata(const handlers::AttributeMetadata& attribute) {
-		// FIXME: escaping/filtering
-		std::string recfileType = toRecfileType(attribute.getTypeId());
-		if (recfileType.size()) output << "%type: " << convertor.to_bytes(attribute.getAttributeName()) << " " << recfileType << std::endl;
+	void writeAttributeMetadata(size_t i) {
+		std::string recfileType = toRecfileType(attributeTypes[i]);
+		if (recfileType.size()) output << "%type: " << convertor.to_bytes(attributeNamesOut[i]) << " " << recfileType << std::endl;
 	}
 
 	void writeSeparator() {
@@ -90,7 +90,9 @@
 		}
 	}
 
-	void writeAttribute(const string_t& name, const TypeId& type, const string_t& value) {
+	const string_t escapeAttributeName(const string_t& name) {
+		std::wstringstream escaped;
+
 		// TODO: multiple escapting mode - including one that is not lossless
 		// but allows writing a single '_' inside the name
 		for (size_t i = 0, limit = name.size(); i < limit; i++) {
@@ -99,13 +101,18 @@
 
 			// Not a lossless round-trip
 			// (maybe we could sacrifice some reserved prefix):
-			if (i == 0 && !valid) output << 'x';
+			if (i == 0 && !valid) escaped << 'x';
 
-			if (ch == '_') output << "__";
-			else if (valid) output << convertor.to_bytes(ch);
-			else output << '_' << ((uint32_t) ch) << '_';
+			if (ch == '_') escaped << "__";
+			else if (valid) escaped << ch;
+			else escaped << '_' << ((uint32_t) ch) << '_';
 		}
-		output << ": ";
+
+		return escaped.str();
+	}
+
+	void writeAttribute(const string_t& escapedName, const TypeId& type, const string_t& value) {
+		output << convertor.to_bytes(escapedName) << ": ";
 
 		for (char ch : convertor.to_bytes(value)) {
 			output << ch;
@@ -140,12 +147,14 @@
 		attributeCount = attributes.size();
 		attributeTypes.resize(attributeCount);
 		attributeTypeCodes.resize(attributeCount);
-		attributeNames.resize(attributeCount);
+		attributeNamesIn.resize(attributeCount);
+		attributeNamesOut.resize(attributeCount);
 		for (int i = 0; i < attributes.size(); i++) {
-			attributeNames[i] = attributes[i].getAttributeName();
+			attributeNamesIn[i] = attributes[i].getAttributeName();
+			attributeNamesOut[i] = escapeAttributeName(attributeNamesIn[i]);
 			attributeTypes[i] = attributes[i].getTypeId();
 			attributeTypeCodes[i] = attributes[i].getTypeName();
-			writeAttributeMetadata(attributes[i]);
+			writeAttributeMetadata(i);
 		}
 	}
 
@@ -153,7 +162,7 @@
 		integer_t i = valueCount % attributeCount;
 		if (i == 0) writeSeparator();
 		valueCount++;
-		writeAttribute(attributeNames[i], attributeTypes[i], value);
+		writeAttribute(attributeNamesOut[i], attributeTypes[i], value);
 	}
 
 	void endOfPipe() {