6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
authormgerdin
Thu, 03 Apr 2014 14:54:42 +0200
changeset 24237 7b210ef8c830
parent 24236 02c0f8873a3c
child 24238 641b2b1b0163
6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion Summary: Added support for target buffer in as_utf8_string(), minor refactoring of as_utf8 and added some internal VM testing Reviewed-by: coleenp, dsimms, sla, dholmes Contributed-by: marcus.larsson@oracle.com
hotspot/src/share/vm/classfile/javaClasses.cpp
hotspot/src/share/vm/classfile/javaClasses.hpp
hotspot/src/share/vm/prims/jni.cpp
hotspot/src/share/vm/utilities/utf8.cpp
--- a/hotspot/src/share/vm/classfile/javaClasses.cpp	Thu Apr 24 00:46:17 2014 +0200
+++ b/hotspot/src/share/vm/classfile/javaClasses.cpp	Thu Apr 03 14:54:42 2014 +0200
@@ -421,6 +421,15 @@
   return UNICODE::as_utf8(position, len);
 }
 
+char* java_lang_String::as_utf8_string(oop java_string, int start, int len, char* buf, int buflen) {
+  typeArrayOop value  = java_lang_String::value(java_string);
+  int          offset = java_lang_String::offset(java_string);
+  int          length = java_lang_String::length(java_string);
+  assert(start + len <= length, "just checking");
+  jchar* position = value->char_at_addr(offset + start);
+  return UNICODE::as_utf8(position, len, buf, buflen);
+}
+
 bool java_lang_String::equals(oop java_string, jchar* chars, int len) {
   assert(java_string->klass() == SystemDictionary::String_klass(),
          "must be java_string");
--- a/hotspot/src/share/vm/classfile/javaClasses.hpp	Thu Apr 24 00:46:17 2014 +0200
+++ b/hotspot/src/share/vm/classfile/javaClasses.hpp	Thu Apr 03 14:54:42 2014 +0200
@@ -162,6 +162,7 @@
   static char*  as_utf8_string(oop java_string);
   static char*  as_utf8_string(oop java_string, char* buf, int buflen);
   static char*  as_utf8_string(oop java_string, int start, int len);
+  static char*  as_utf8_string(oop java_string, int start, int len, char* buf, int buflen);
   static char*  as_platform_dependent_str(Handle java_string, TRAPS);
   static jchar* as_unicode_string(oop java_string, int& length, TRAPS);
   // produce an ascii string with all other values quoted using \u####
--- a/hotspot/src/share/vm/prims/jni.cpp	Thu Apr 24 00:46:17 2014 +0200
+++ b/hotspot/src/share/vm/prims/jni.cpp	Thu Apr 03 14:54:42 2014 +0200
@@ -3150,11 +3150,9 @@
   } else {
     //%note jni_7
     if (len > 0) {
-      ResourceMark rm(THREAD);
-      char *utf_region = java_lang_String::as_utf8_string(s, start, len);
-      int utf_len = (int)strlen(utf_region);
-      memcpy(buf, utf_region, utf_len);
-      buf[utf_len] = 0;
+      // Assume the buffer is large enough as the JNI spec. does not require user error checking
+      java_lang_String::as_utf8_string(s, start, len, buf, INT_MAX);
+      // as_utf8_string null-terminates the result string
     } else {
       // JDK null-terminates the buffer even in len is zero
       if (buf != NULL) {
@@ -3880,6 +3878,7 @@
 void TestOldSize_test();
 void TestKlass_test();
 void TestBitMap_test();
+void TestAsUtf8();
 #if INCLUDE_ALL_GCS
 void TestOldFreeSpaceCalculation_test();
 void TestG1BiasedArray_test();
@@ -3907,6 +3906,7 @@
     run_unit_test(TestOldSize_test());
     run_unit_test(TestKlass_test());
     run_unit_test(TestBitMap_test());
+    run_unit_test(TestAsUtf8());
 #if INCLUDE_VM_STRUCTS
     run_unit_test(VMStructs::test());
 #endif
--- a/hotspot/src/share/vm/utilities/utf8.cpp	Thu Apr 24 00:46:17 2014 +0200
+++ b/hotspot/src/share/vm/utilities/utf8.cpp	Thu Apr 03 14:54:42 2014 +0200
@@ -329,23 +329,19 @@
 
 char* UNICODE::as_utf8(jchar* base, int length) {
   int utf8_len = utf8_length(base, length);
-  u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
-  u_char* p = result;
-  for (int index = 0; index < length; index++) {
-    p = utf8_write(p, base[index]);
-  }
-  *p = '\0';
-  assert(p == &result[utf8_len], "length prediction must be correct");
-  return (char*) result;
+  u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
+  char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
+  assert((int) strlen(result) == utf8_len, "length prediction must be correct");
+  return result;
 }
 
 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
   u_char* p = (u_char*)buf;
-  u_char* end = (u_char*)buf + buflen;
   for (int index = 0; index < length; index++) {
     jchar c = base[index];
-    if (p + utf8_size(c) >= end) break;      // string is truncated
-    p = utf8_write(p, base[index]);
+    buflen -= utf8_size(c);
+    if (buflen <= 0) break; // string is truncated
+    p = utf8_write(p, c);
   }
   *p = '\0';
   return buf;
@@ -389,3 +385,29 @@
   }
   *p = '\0';
 }
+
+#ifndef PRODUCT
+void TestAsUtf8() {
+  char res[60];
+  jchar str[20];
+
+  for (int i = 0; i < 20; i++) {
+    str[i] = 0x0800; // char that is 2B in UTF-16 but 3B in UTF-8
+  }
+  str[19] = (jchar)'\0';
+
+  // The resulting string in UTF-8 is 3*19 bytes long, but should be truncated
+  UNICODE::as_utf8(str, 19, res, 10);
+  assert(strlen(res) == 9, "string should be truncated here");
+
+  UNICODE::as_utf8(str, 19, res, 18);
+  assert(strlen(res) == 15, "string should be truncated here");
+
+  UNICODE::as_utf8(str, 19, res, 20);
+  assert(strlen(res) == 18, "string should be truncated here");
+
+  // Test with an "unbounded" buffer
+  UNICODE::as_utf8(str, 19, res, INT_MAX);
+  assert(strlen(res) == 3*19, "string should end here");
+}
+#endif