# HG changeset patch # User František Kučera # Date 1676763407 -3600 # Node ID f67047a1e19e6bf08f5f154fcb2e7c0c9aa915dc # Parent 1731e8dff446fb845344c7206062efea25c55e34 attribute name escaping: also attributes in the relation header diff -r 1731e8dff446 -r f67047a1e19e src/RecfileHandler.h --- a/src/RecfileHandler.h Sat Feb 18 22:57:22 2023 +0100 +++ b/src/RecfileHandler.h Sun Feb 19 00:36:47 2023 +0100 @@ -43,7 +43,8 @@ wstring_convert> convertor; // XML output will be always in UTF-8 std::vector attributeTypes; std::vector attributeTypeCodes; - std::vector attributeNames; + std::vector attributeNamesIn; + std::vector attributeNamesOut; integer_t valueCount = 0; integer_t attributeCount = 0; integer_t relationCount = 0; @@ -62,10 +63,9 @@ } } - void writeAttributeMetadata(const handlers::AttributeMetadata& attribute) { - // FIXME: escaping/filtering - std::string recfileType = toRecfileType(attribute.getTypeId()); - if (recfileType.size()) output << "%type: " << convertor.to_bytes(attribute.getAttributeName()) << " " << recfileType << std::endl; + void writeAttributeMetadata(size_t i) { + std::string recfileType = toRecfileType(attributeTypes[i]); + if (recfileType.size()) output << "%type: " << convertor.to_bytes(attributeNamesOut[i]) << " " << recfileType << std::endl; } void writeSeparator() { @@ -90,7 +90,9 @@ } } - void writeAttribute(const string_t& name, const TypeId& type, const string_t& value) { + const string_t escapeAttributeName(const string_t& name) { + std::wstringstream escaped; + // TODO: multiple escapting mode - including one that is not lossless // but allows writing a single '_' inside the name for (size_t i = 0, limit = name.size(); i < limit; i++) { @@ -99,13 +101,18 @@ // Not a lossless round-trip // (maybe we could sacrifice some reserved prefix): - if (i == 0 && !valid) output << 'x'; + if (i == 0 && !valid) escaped << 'x'; - if (ch == '_') output << "__"; - else if (valid) output << convertor.to_bytes(ch); - else output << '_' << ((uint32_t) ch) << '_'; + if (ch == '_') escaped << "__"; + else if (valid) escaped << ch; + else escaped << '_' << ((uint32_t) ch) << '_'; } - output << ": "; + + return escaped.str(); + } + + void writeAttribute(const string_t& escapedName, const TypeId& type, const string_t& value) { + output << convertor.to_bytes(escapedName) << ": "; for (char ch : convertor.to_bytes(value)) { output << ch; @@ -140,12 +147,14 @@ attributeCount = attributes.size(); attributeTypes.resize(attributeCount); attributeTypeCodes.resize(attributeCount); - attributeNames.resize(attributeCount); + attributeNamesIn.resize(attributeCount); + attributeNamesOut.resize(attributeCount); for (int i = 0; i < attributes.size(); i++) { - attributeNames[i] = attributes[i].getAttributeName(); + attributeNamesIn[i] = attributes[i].getAttributeName(); + attributeNamesOut[i] = escapeAttributeName(attributeNamesIn[i]); attributeTypes[i] = attributes[i].getTypeId(); attributeTypeCodes[i] = attributes[i].getTypeName(); - writeAttributeMetadata(attributes[i]); + writeAttributeMetadata(i); } } @@ -153,7 +162,7 @@ integer_t i = valueCount % attributeCount; if (i == 0) writeSeparator(); valueCount++; - writeAttribute(attributeNames[i], attributeTypes[i], value); + writeAttribute(attributeNamesOut[i], attributeTypes[i], value); } void endOfPipe() {