changeset 45 225e294ad1db
parent 44 ec9694f3b343
--- a/src/SqlException.cpp	Tue Jun 02 23:31:55 2020 +0200
+++ b/src/SqlException.cpp	Wed Jun 03 17:22:22 2020 +0200
@@ -34,7 +34,7 @@
 SqlException::SqlException(std::wstring message, SQLRETURN resultCode, SQLSMALLINT handleType, SQLHANDLE handle, bool freeHandle) : message(message), resultCode(resultCode) {
-	std::wstring_convert < std::codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings
+	std::wstring_convert < std::codecvt_utf8<wchar_t>> convertor("", L"conversion failed"); // TODO: support also other encodings
 	memset(buffer, 0, sizeof (buffer));
@@ -42,16 +42,30 @@
 	SQLINTEGER sqlcode;
 	for (SQLSMALLINT i = 1; SQLGetDiagRec(handleType, handle, i, sqlstate, &sqlcode, buffer, SQL_MAX_MESSAGE_LENGTH + 1, &length) == SQL_SUCCESS; i++) {
-		for (size_t i = 0; i < sizeof (buffer); i++) if (buffer[i] > 126 || buffer[i] == '\r') buffer[i] = '?'; // keep only ASCII characters and avoid CR which breaks the output
-		diagnostics.push_back({convertor.from_bytes((char*) sqlstate), sqlcode, convertor.from_bytes((char*) buffer)});
-		// FIXME: decoding fails is error message contains unicode characters – exception is thrown:
-		// terminate called after throwing an instance of 'std::range_error'
-		//   what():  wstring_convert::from_bytes
-		// Exception can be avoided by:
-		//   std::wstring_convert < std::codecvt_utf8<wchar_t>> convertor("", L"XXX Unable to decode error message from SQLGetDiagRec()");
-		// but actual error message is then lost.
-		// So as a workaround we keep only ASCII characters.
-		// It seems that we sometimes get valid ISO-8859-2 or ISO-8859-1 encoded messages even if our platform encoding is UTF-8.
+		std::wstring diagMessage = convertor.from_bytes((char*) buffer);
+		if (diagMessage == L"conversion failed") {
+			for (size_t i = 0; i < sizeof (buffer); i++) if (buffer[i] > 126 || buffer[i] == '\r') buffer[i] = '?'; // keep only ASCII characters and avoid CR which breaks the output
+			diagMessage = convertor.from_bytes((char*) buffer);
+		}
+		diagnostics.push_back({convertor.from_bytes((char*) sqlstate), sqlcode, diagMessage});
+		// FIXME: character encoding in SQLGetDiagRec()
+		//
+		// sometimes we get valid UTF-8 string
+		// sometimes we get valid ISO-8859-1 string (which is not valid UTF-8)
+		//
+		// Temporary workaround: try UTF-8 decoding and if it fails, remove all non-ASCII characters and try again
+		//
+		// See also: DriverManager/SQLGetDiagRec.c:
+		//   else if ( !__get_connection( head ) -> unicode_driver
+		// and DriverManager/__info.c:
+		//   char *asc[] = { "char", "char", "ISO8859-1", "ISO-8859-1", "8859-1", "iso8859_1", "ASCII", NULL };
+		// in unixODBC
+		//
+		// We get valid UTF-8 if "PostgreSQL Unicode" driver is used and connection is successful: e.g. SELECT * FROM žádná_taková_tabulka_neexistuje;
+		// We get valid ISO-8859-1 if "PostgreSQL Unicode" driver is used and connection is not successful and LANG=cs_CZ.UTF-8 e.g. [unixODBC]could not connect to server: Spojení odmítnuto
+		// n.b. character í is present in ISO-8859-1, but other Czech characters (čřš etc.) are not (they are in ISO-8859-2)
+		//
+		// SQLGetDiagRec() behavior differs from other functions like SQLGetData(). Maybe use SQLGetDiagRecW() and decode UCS-2 / UTF-16.
 	if (freeHandle) OdbcCommon::freeHandle(handleType, handle);