# HG changeset patch # User František Kučera # Date 1591197742 -7200 # Node ID 225e294ad1dbf1d776f3dfeeae49688cf1806b96 # Parent ec9694f3b3432ea302e87e9d4046f759898e961c faulty SQLGetDiagRec() call, unicode error, temporary workaround II diff -r ec9694f3b343 -r 225e294ad1db src/SqlException.cpp --- a/src/SqlException.cpp Tue Jun 02 23:31:55 2020 +0200 +++ b/src/SqlException.cpp Wed Jun 03 17:22:22 2020 +0200 @@ -34,7 +34,7 @@ } SqlException::SqlException(std::wstring message, SQLRETURN resultCode, SQLSMALLINT handleType, SQLHANDLE handle, bool freeHandle) : message(message), resultCode(resultCode) { - std::wstring_convert < std::codecvt_utf8> convertor; // TODO: support also other encodings + std::wstring_convert < std::codecvt_utf8> convertor("", L"conversion failed"); // TODO: support also other encodings SQLCHAR buffer[SQL_MAX_MESSAGE_LENGTH + 1]; SQLCHAR sqlstate[SQL_SQLSTATE_SIZE + 1]; memset(buffer, 0, sizeof (buffer)); @@ -42,16 +42,30 @@ SQLINTEGER sqlcode; SQLSMALLINT length; for (SQLSMALLINT i = 1; SQLGetDiagRec(handleType, handle, i, sqlstate, &sqlcode, buffer, SQL_MAX_MESSAGE_LENGTH + 1, &length) == SQL_SUCCESS; i++) { - for (size_t i = 0; i < sizeof (buffer); i++) if (buffer[i] > 126 || buffer[i] == '\r') buffer[i] = '?'; // keep only ASCII characters and avoid CR which breaks the output - diagnostics.push_back({convertor.from_bytes((char*) sqlstate), sqlcode, convertor.from_bytes((char*) buffer)}); - // FIXME: decoding fails is error message contains unicode characters – exception is thrown: - // terminate called after throwing an instance of 'std::range_error' - // what(): wstring_convert::from_bytes - // Exception can be avoided by: - // std::wstring_convert < std::codecvt_utf8> convertor("", L"XXX Unable to decode error message from SQLGetDiagRec()"); - // but actual error message is then lost. - // So as a workaround we keep only ASCII characters. - // It seems that we sometimes get valid ISO-8859-2 or ISO-8859-1 encoded messages even if our platform encoding is UTF-8. + std::wstring diagMessage = convertor.from_bytes((char*) buffer); + if (diagMessage == L"conversion failed") { + for (size_t i = 0; i < sizeof (buffer); i++) if (buffer[i] > 126 || buffer[i] == '\r') buffer[i] = '?'; // keep only ASCII characters and avoid CR which breaks the output + diagMessage = convertor.from_bytes((char*) buffer); + } + diagnostics.push_back({convertor.from_bytes((char*) sqlstate), sqlcode, diagMessage}); + // FIXME: character encoding in SQLGetDiagRec() + // + // sometimes we get valid UTF-8 string + // sometimes we get valid ISO-8859-1 string (which is not valid UTF-8) + // + // Temporary workaround: try UTF-8 decoding and if it fails, remove all non-ASCII characters and try again + // + // See also: DriverManager/SQLGetDiagRec.c: + // else if ( !__get_connection( head ) -> unicode_driver + // and DriverManager/__info.c: + // char *asc[] = { "char", "char", "ISO8859-1", "ISO-8859-1", "8859-1", "iso8859_1", "ASCII", NULL }; + // in unixODBC + // + // We get valid UTF-8 if "PostgreSQL Unicode" driver is used and connection is successful: e.g. SELECT * FROM žádná_taková_tabulka_neexistuje; + // We get valid ISO-8859-1 if "PostgreSQL Unicode" driver is used and connection is not successful and LANG=cs_CZ.UTF-8 e.g. [unixODBC]could not connect to server: Spojení odmítnuto + // n.b. character í is present in ISO-8859-1, but other Czech characters (čřš etc.) are not (they are in ISO-8859-2) + // + // SQLGetDiagRec() behavior differs from other functions like SQLGetData(). Maybe use SQLGetDiagRecW() and decode UCS-2 / UTF-16. } if (freeHandle) OdbcCommon::freeHandle(handleType, handle);