8164612: NoSuchMethodException when method name contains NULL or Latin-1 supplement character
Summary: String length needs to be updated when converting from unicode to utf8.
Reviewed-by: kvn, coleenp
--- a/hotspot/src/share/vm/classfile/stringTable.cpp Mon May 23 19:46:43 2016 +0200
+++ b/hotspot/src/share/vm/classfile/stringTable.cpp Wed Oct 26 14:36:05 2016 +0200
@@ -437,17 +437,15 @@
st->print("%d: ", length);
} else {
ResourceMark rm(THREAD);
- int utf8_length;
+ int utf8_length = length;
char* utf8_string;
if (!is_latin1) {
jchar* chars = value->char_at_addr(0);
- utf8_length = UNICODE::utf8_length(chars, length);
- utf8_string = UNICODE::as_utf8(chars, length);
+ utf8_string = UNICODE::as_utf8(chars, utf8_length);
} else {
jbyte* bytes = value->byte_at_addr(0);
- utf8_length = UNICODE::utf8_length(bytes, length);
- utf8_string = UNICODE::as_utf8(bytes, length);
+ utf8_string = UNICODE::as_utf8(bytes, utf8_length);
}
st->print("%d: ", utf8_length);
--- a/hotspot/src/share/vm/prims/jvmtiEnv.cpp Mon May 23 19:46:43 2016 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiEnv.cpp Wed Oct 26 14:36:05 2016 +0200
@@ -1001,7 +1001,8 @@
if (name() != NULL) {
n = java_lang_String::as_utf8_string(name());
} else {
- n = UNICODE::as_utf8((jchar*) NULL, 0);
+ int utf8_length = 0;
+ n = UNICODE::as_utf8((jchar*) NULL, utf8_length);
}
info_ptr->name = (char *) jvmtiMalloc(strlen(n)+1);
--- a/hotspot/src/share/vm/utilities/utf8.cpp Mon May 23 19:46:43 2016 +0200
+++ b/hotspot/src/share/vm/utilities/utf8.cpp Wed Oct 26 14:36:05 2016 +0200
@@ -411,61 +411,46 @@
}
int UNICODE::utf8_size(jchar c) {
- if ((0x0001 <= c) && (c <= 0x007F)) return 1;
- if (c <= 0x07FF) return 2;
- return 3;
+ if ((0x0001 <= c) && (c <= 0x007F)) {
+ // ASCII character
+ return 1;
+ } else if (c <= 0x07FF) {
+ return 2;
+ } else {
+ return 3;
+ }
}
int UNICODE::utf8_size(jbyte c) {
- if (c >= 0x0001) return 1;
- return 2;
+ if (c >= 0x01) {
+ // ASCII character. Check is equivalent to
+ // (0x01 <= c) && (c <= 0x7F) because c is signed.
+ return 1;
+ } else {
+ // Non-ASCII character or 0x00 which needs to be
+ // two-byte encoded as 0xC080 in modified UTF-8.
+ return 2;
+ }
}
-int UNICODE::utf8_length(jchar* base, int length) {
+template<typename T>
+int UNICODE::utf8_length(T* base, int length) {
int result = 0;
for (int index = 0; index < length; index++) {
- jchar c = base[index];
- if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
- else if (c <= 0x07FF) result += 2;
- else result += 3;
- }
- return result;
-}
-
-int UNICODE::utf8_length(jbyte* base, int length) {
- int result = 0;
- for (int index = 0; index < length; index++) {
- jbyte c = base[index];
+ T c = base[index];
result += utf8_size(c);
}
return result;
}
-char* UNICODE::as_utf8(jchar* base, int length) {
+template<typename T>
+char* UNICODE::as_utf8(T* base, int& length) {
int utf8_len = utf8_length(base, length);
u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
assert((int) strlen(result) == utf8_len, "length prediction must be correct");
- return result;
-}
-
-char* UNICODE::as_utf8(jbyte* base, int length) {
- int utf8_len = utf8_length(base, length);
- u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
- u_char* p = result;
- if (utf8_len == length) {
- for (int index = 0; index < length; index++) {
- *p++ = base[index];
- }
- } else {
- // Unicode string contains U+0000 which should
- // be encoded as 0xC080 in "modified" UTF8.
- for (int index = 0; index < length; index++) {
- p = utf8_write(p, ((jchar) base[index]) & 0xff);
- }
- }
- *p = '\0';
- assert(p == &result[utf8_len], "length prediction must be correct");
+ // Set string length to uft8 length
+ length = utf8_len;
return (char*) result;
}
@@ -490,9 +475,10 @@
buflen -= sz;
if (buflen <= 0) break; // string is truncated
if (sz == 1) {
+ // Copy ASCII characters (UTF-8 is ASCII compatible)
*p++ = c;
} else {
- // Unicode string contains U+0000 which should
+ // Non-ASCII character or 0x00 which should
// be encoded as 0xC080 in "modified" UTF8.
p = utf8_write(p, ((jchar) c) & 0xff);
}
@@ -543,6 +529,10 @@
}
// Explicit instantiation for all supported types.
+template int UNICODE::utf8_length(jbyte* base, int length);
+template int UNICODE::utf8_length(jchar* base, int length);
+template char* UNICODE::as_utf8(jbyte* base, int& length);
+template char* UNICODE::as_utf8(jchar* base, int& length);
template int UNICODE::quoted_ascii_length<jbyte>(jbyte* base, int length);
template int UNICODE::quoted_ascii_length<jchar>(jchar* base, int length);
template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, int buflen);
--- a/hotspot/src/share/vm/utilities/utf8.hpp Mon May 23 19:46:43 2016 +0200
+++ b/hotspot/src/share/vm/utilities/utf8.hpp Wed Oct 26 14:36:05 2016 +0200
@@ -97,16 +97,15 @@
static int utf8_size(jbyte c);
// returns the utf8 length of a unicode string
- static int utf8_length(jchar* base, int length);
- static int utf8_length(jbyte* base, int length);
+ template<typename T> static int utf8_length(T* base, int length);
// converts a unicode string to utf8 string
static void convert_to_utf8(const jchar* base, int length, char* utf8_buffer);
// converts a unicode string to a utf8 string; result is allocated
- // in resource area unless a buffer is provided.
- static char* as_utf8(jchar* base, int length);
- static char* as_utf8(jbyte* base, int length);
+ // in resource area unless a buffer is provided. The unicode 'length'
+ // parameter is set to the length of the result utf8 string.
+ template<typename T> static char* as_utf8(T* base, int& length);
static char* as_utf8(jchar* base, int length, char* buf, int buflen);
static char* as_utf8(jbyte* base, int length, char* buf, int buflen);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/CompactStrings/TestMethodNames.java Wed Oct 26 14:36:05 2016 +0200
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import javax.script.*;
+import java.util.function.*;
+
+/*
+ * @test
+ * @bug 8164612
+ * @summary Test method names containing Latin-1 supplement characters.
+ * @run main/othervm -XX:+CompactStrings TestMethodNames
+ * @run main/othervm -XX:-CompactStrings TestMethodNames
+ */
+public class TestMethodNames {
+ public static void main(String[] args) throws Exception {
+ ScriptEngineManager m = new ScriptEngineManager();
+ ScriptEngine e = m.getEngineByName("nashorn");
+
+ e.eval("({get \"\0\"(){}})[\"\0\"]");
+ e.eval("({get \"\\x80\"(){}})[\"\\x80\"]");
+ e.eval("({get \"\\xff\"(){}})[\"\\xff\"]");
+ }
+}