7013347: allow crypto functions to be called inline to enhance performance
authornever
Wed, 01 Feb 2012 16:57:08 -0800
changeset 11637 030466036615
parent 11636 3c07b54482a5
child 11638 68657fd5d7b4
7013347: allow crypto functions to be called inline to enhance performance Reviewed-by: kvn
hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
hotspot/src/share/vm/code/nmethod.cpp
hotspot/src/share/vm/code/nmethod.hpp
hotspot/src/share/vm/memory/gcLocker.cpp
hotspot/src/share/vm/memory/gcLocker.hpp
hotspot/src/share/vm/oops/arrayOop.cpp
hotspot/src/share/vm/oops/methodOop.cpp
hotspot/src/share/vm/oops/methodOop.hpp
hotspot/src/share/vm/prims/nativeLookup.cpp
hotspot/src/share/vm/prims/nativeLookup.hpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/safepoint.cpp
hotspot/src/share/vm/runtime/safepoint.hpp
hotspot/src/share/vm/runtime/sharedRuntime.cpp
hotspot/src/share/vm/runtime/sharedRuntime.hpp
hotspot/src/share/vm/runtime/thread.cpp
hotspot/src/share/vm/runtime/thread.hpp
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2134,6 +2134,7 @@
   // address pseudos: make these names unlike instruction names to avoid confusion
   inline intptr_t load_pc_address( Register reg, int bytes_to_skip );
   inline void load_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
+  inline void load_bool_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
   inline void load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
   inline void store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
   inline void store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
@@ -2249,7 +2250,7 @@
   // this platform we assume byte size
 
   inline void stbool(Register d, const Address& a) { stb(d, a); }
-  inline void ldbool(const Address& a, Register d) { ldsb(a, d); }
+  inline void ldbool(const Address& a, Register d) { ldub(a, d); }
   inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); }
 
   // klass oop manipulations if compressed
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -692,6 +692,17 @@
 }
 
 
+inline void MacroAssembler::load_bool_contents(const AddressLiteral& addrlit, Register d, int offset) {
+  assert_not_delayed();
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, d);
+  } else {
+    sethi(addrlit, d);
+  }
+  ldub(d, addrlit.low10() + offset, d);
+}
+
+
 inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) {
   assert_not_delayed();
   if (ForceUnreachable) {
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -321,6 +321,16 @@
   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 }
 
+static VMRegPair reg64_to_VMRegPair(Register r) {
+  VMRegPair ret;
+  if (wordSize == 8) {
+    ret.set2(r->as_VMReg());
+  } else {
+    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
+  }
+  return ret;
+}
+
 // ---------------------------------------------------------------------------
 // Read the array of BasicTypes from a signature, and compute where the
 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
@@ -1444,6 +1454,25 @@
 }
 
 
+static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, L5);
+      __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
+    } else {
+      // stack to reg
+      __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
+  } else {
+    __ mov(src.first()->as_Register(), dst.first()->as_Register());
+  }
+}
+
+
 // An oop arg. Must pass a handle not the oop itself
 static void object_move(MacroAssembler* masm,
                         OopMap* map,
@@ -1748,6 +1777,166 @@
   }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  if (map != NULL) {
+    // Fill in the map
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        if (in_regs[i].first()->is_stack()) {
+          int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+          map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+        } else if (in_regs[i].first()->is_Register()) {
+          map->set_oop(in_regs[i].first());
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+    }
+  }
+
+  // Save or restore double word values
+  int handle_index = 0;
+  for (int i = 0; i < total_in_args; i++) {
+    int slot = handle_index + arg_save_area;
+    int offset = slot * VMRegImpl::stack_slot_size;
+    if (in_sig_bt[i] == T_LONG && in_regs[i].first()->is_Register()) {
+      const Register reg = in_regs[i].first()->as_Register();
+      if (reg->is_global()) {
+        handle_index += 2;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ stx(reg, SP, offset + STACK_BIAS);
+        } else {
+          __ ldx(SP, offset + STACK_BIAS, reg);
+        }
+      }
+    } else if (in_sig_bt[i] == T_DOUBLE && in_regs[i].first()->is_FloatRegister()) {
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ stf(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS);
+      } else {
+        __ ldf(FloatRegisterImpl::D, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister());
+      }
+    }
+  }
+  // Save floats
+  for (int i = 0; i < total_in_args; i++) {
+    int slot = handle_index + arg_save_area;
+    int offset = slot * VMRegImpl::stack_slot_size;
+    if (in_sig_bt[i] == T_FLOAT && in_regs[i].first()->is_FloatRegister()) {
+      handle_index++;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ stf(FloatRegisterImpl::S, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS);
+      } else {
+        __ ldf(FloatRegisterImpl::S, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister());
+      }
+    }
+  }
+
+}
+
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               const int stack_slots,
+                                               const int total_in_args,
+                                               const int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  AddressLiteral sync_state(GC_locker::needs_gc_address());
+  __ load_bool_contents(sync_state, G3_scratch);
+  __ cmp_zero_and_br(Assembler::equal, G3_scratch, cont);
+  __ delayed()->nop();
+
+  // Save down any values that are live in registers and call into the
+  // runtime to halt for a GC
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  __ mov(G2_thread, L7_thread_cache);
+
+  __ set_last_Java_frame(SP, noreg);
+
+  __ block_comment("block_for_jni_critical");
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical), relocInfo::runtime_call_type);
+  __ delayed()->mov(L7_thread_cache, O0);
+  oop_maps->add_gc_map( __ offset(), map);
+
+  __ restore_thread(L7_thread_cache); // restore G2_thread
+  __ reset_last_Java_frame();
+
+  // Reload all the register arguments
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        if (reg->is_global()) {
+          __ mov(G0, reg);
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        __ fneg(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  // Pass the length, ptr pair
+  Label is_null, done;
+  if (reg.first()->is_stack()) {
+    VMRegPair tmp  = reg64_to_VMRegPair(L2);
+    // Load the arg up from the stack
+    move_ptr(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ cmp(reg.first()->as_Register(), G0);
+  __ brx(Assembler::equal, false, Assembler::pt, is_null);
+  __ delayed()->add(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type), L4);
+  move_ptr(masm, reg64_to_VMRegPair(L4), body_arg);
+  __ ld(reg.first()->as_Register(), arrayOopDesc::length_offset_in_bytes(), L4);
+  move32_64(masm, reg64_to_VMRegPair(L4), length_arg);
+  __ ba_short(done);
+  __ bind(is_null);
+  // Pass zeros
+  move_ptr(masm, reg64_to_VMRegPair(G0), body_arg);
+  move32_64(masm, reg64_to_VMRegPair(G0), length_arg);
+  __ bind(done);
+}
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
@@ -1762,6 +1951,13 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
 
   // Native nmethod wrappers never take possesion of the oop arguments.
   // So the caller will gc the arguments. The only thing we need an
@@ -1841,22 +2037,70 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  int total_save_slots = 6 * VMRegImpl::slots_per_word;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // These have to be saved and restored across the safepoint
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair  * out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
-  }
-
-  for (int i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
   // Now figure out where the args must be stored and how much stack space
@@ -1866,6 +2110,35 @@
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  assert(reg->is_in(), "don't need to save these"); break;
+          case T_LONG: if (reg->is_global()) double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+  }
+
   // Compute framesize for the wrapper.  We need to handlize all oops in
   // registers. We must create space for them here that is disjoint from
   // the windowed save area because we have no control over when we might
@@ -1885,12 +2158,11 @@
 
   // Now the space for the inbound oop handle area
 
-  int oop_handle_offset = stack_slots;
-  stack_slots += 6*VMRegImpl::slots_per_word;
+  int oop_handle_offset = round_to(stack_slots, 2);
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
-  int oop_temp_slot_offset = 0;
   int klass_slot_offset = 0;
   int klass_offset = -1;
   int lock_slot_offset = 0;
@@ -1954,6 +2226,10 @@
 
   __ verify_thread();
 
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, stack_slots,  total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1982,7 +2258,6 @@
   // caller.
   //
   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-  int c_arg = total_c_args - 1;
   // Record sp-based slot for receiver on stack for non-static methods
   int receiver_offset = -1;
 
@@ -2002,7 +2277,7 @@
 
 #endif /* ASSERT */
 
-  for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
+  for ( int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0 ; i--, c_arg-- ) {
 
 #ifdef ASSERT
     if (in_regs[i].first()->is_Register()) {
@@ -2019,7 +2294,13 @@
 
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg], out_regs[c_arg - 1]);
+          c_arg--;
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -2029,7 +2310,7 @@
 
       case T_FLOAT:
         float_move(masm, in_regs[i], out_regs[c_arg]);
-          break;
+        break;
 
       case T_DOUBLE:
         assert( i + 1 < total_in_args &&
@@ -2051,7 +2332,7 @@
 
   // Pre-load a static method's oop into O1.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
     __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);
 
     // Now handlize the static class mirror in O1.  It's known not-null.
@@ -2064,13 +2345,13 @@
   const Register L6_handle = L6;
 
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
     __ mov(O1, L6_handle);
   }
 
   // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
   // except O6/O7. So if we must call out we must push a new frame. We immediately
   // push a new frame and flush the windows.
-
 #ifdef _LP64
   intptr_t thepc = (intptr_t) __ pc();
   {
@@ -2202,32 +2483,28 @@
   }
 
   // get JNIEnv* which is first argument to native
-
-  __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
+  if (!is_critical_native) {
+    __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
+  }
 
   // Use that pc we placed in O7 a while back as the current frame anchor
-
   __ set_last_Java_frame(SP, O7);
 
+  // We flushed the windows ages ago now mark them as flushed before transitioning.
+  __ set(JavaFrameAnchor::flushed, G3_scratch);
+  __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
+
   // Transition from _thread_in_Java to _thread_in_native.
   __ set(_thread_in_native, G3_scratch);
-  __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
-
-  // We flushed the windows ages ago now mark them as flushed
-
-  // mark windows as flushed
-  __ set(JavaFrameAnchor::flushed, G3_scratch);
-
-  Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
 
 #ifdef _LP64
-  AddressLiteral dest(method->native_function());
+  AddressLiteral dest(native_func);
   __ relocate(relocInfo::runtime_call_type);
   __ jumpl_to(dest, O7, O7);
 #else
-  __ call(method->native_function(), relocInfo::runtime_call_type);
+  __ call(native_func, relocInfo::runtime_call_type);
 #endif
-  __ delayed()->st(G3_scratch, flags);
+  __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
 
   __ restore_thread(L7_thread_cache); // restore G2_thread
 
@@ -2259,6 +2536,7 @@
     ShouldNotReachHere();
   }
 
+  Label after_transition;
   // must we block?
 
   // Block, if necessary, before resuming in _thread_in_Java state.
@@ -2303,22 +2581,34 @@
     // a distinct one for this pc
     //
     save_native_result(masm, ret_type, stack_slots);
-    __ call_VM_leaf(L7_thread_cache,
-                    CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
-                    G2_thread);
+    if (!is_critical_native) {
+      __ call_VM_leaf(L7_thread_cache,
+                      CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
+                      G2_thread);
+    } else {
+      __ call_VM_leaf(L7_thread_cache,
+                      CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition),
+                      G2_thread);
+    }
 
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ ba(after_transition);
+      __ delayed()->nop();
+    }
+
     __ bind(no_block);
   }
 
   // thread state is thread_in_native_trans. Any safepoint blocking has already
   // happened so we can now change state to _thread_in_Java.
-
-
   __ set(_thread_in_Java, G3_scratch);
   __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
-
+  __ bind(after_transition);
 
   Label no_reguard;
   __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);
@@ -2416,12 +2706,14 @@
       __ verify_oop(I0);
   }
 
-  // reset handle block
-  __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
-  __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
-
-  __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
-  check_forward_pending_exception(masm, G3_scratch);
+  if (!is_critical_native) {
+    // reset handle block
+    __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
+    __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
+
+    __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
+    check_forward_pending_exception(masm, G3_scratch);
+  }
 
 
   // Return
@@ -2450,6 +2742,10 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_offset),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
   return nm;
 
 }
@@ -2473,17 +2769,6 @@
 static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
 static bool offsets_initialized = false;
 
-static VMRegPair reg64_to_VMRegPair(Register r) {
-  VMRegPair ret;
-  if (wordSize == 8) {
-    ret.set2(r->as_VMReg());
-  } else {
-    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
-  }
-  return ret;
-}
-
-
 nmethod *SharedRuntime::generate_dtrace_nmethod(
     MacroAssembler *masm, methodHandle method) {
 
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1091,12 +1091,238 @@
   }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  int handle_index = 0;
+  // Save down double word first
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+      } else {
+        __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+      }
+    }
+    if (in_regs[i].first()->is_Register() && in_sig_bt[i] == T_LONG) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movl(Address(rsp, offset), in_regs[i].first()->as_Register());
+        if (in_regs[i].second()->is_Register()) {
+          __ movl(Address(rsp, offset + 4), in_regs[i].second()->as_Register());
+        }
+      } else {
+        __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
+        if (in_regs[i].second()->is_Register()) {
+          __ movl(in_regs[i].second()->as_Register(), Address(rsp, offset + 4));
+        }
+      }
+    }
+  }
+  // Save or restore single word registers
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_Register()) {
+      int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      assert(handle_index <= stack_slots, "overflow");
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        map->set_oop(VMRegImpl::stack2reg(slot));;
+      }
+
+      // Value is in an input register pass we must flush it to the stack
+      const Register reg = in_regs[i].first()->as_Register();
+      switch (in_sig_bt[i]) {
+        case T_ARRAY:
+          if (map != NULL) {
+            __ movptr(Address(rsp, offset), reg);
+          } else {
+            __ movptr(reg, Address(rsp, offset));
+          }
+          break;
+        case T_BOOLEAN:
+        case T_CHAR:
+        case T_BYTE:
+        case T_SHORT:
+        case T_INT:
+          if (map != NULL) {
+            __ movl(Address(rsp, offset), reg);
+          } else {
+            __ movl(reg, Address(rsp, offset));
+          }
+          break;
+        case T_OBJECT:
+        default: ShouldNotReachHere();
+      }
+    } else if (in_regs[i].first()->is_XMMRegister()) {
+      if (in_sig_bt[i] == T_FLOAT) {
+        int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+        int offset = slot * VMRegImpl::stack_slot_size;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+        } else {
+          __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+        }
+      }
+    } else if (in_regs[i].first()->is_stack()) {
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+      }
+    }
+  }
+}
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               Register thread,
+                                               int stack_slots,
+                                               int total_c_args,
+                                               int total_in_args,
+                                               int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
+  __ jcc(Assembler::equal, cont);
+
+  // Save down any incoming oops and call into the runtime to halt for a GC
+
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  address the_pc = __ pc();
+  oop_maps->add_gc_map( __ offset(), map);
+  __ set_last_Java_frame(thread, rsp, noreg, the_pc);
+
+  __ block_comment("block_for_jni_critical");
+  __ push(thread);
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
+  __ increment(rsp, wordSize);
+
+  __ get_thread(thread);
+  __ reset_last_Java_frame(thread, false, true);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args - 1; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        __ xorptr(reg, reg);
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      } else if (in_regs[i].first()->is_stack()) {
+        // Nothing to do
+      } else {
+        ShouldNotReachHere();
+      }
+      if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
+        i++;
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  Register tmp_reg = rax;
+  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+
+  // Pass the length, ptr pair
+  Label is_null, done;
+  VMRegPair tmp(tmp_reg->as_VMReg());
+  if (reg.first()->is_stack()) {
+    // Load the arg up from the stack
+    simple_move32(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
+  __ jccb(Assembler::equal, is_null);
+  __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  simple_move32(masm, tmp, body_arg);
+  // load the length relative to the body.
+  __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
+                           arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  simple_move32(masm, tmp, length_arg);
+  __ jmpb(done);
+  __ bind(is_null);
+  // Pass zeros
+  __ xorptr(tmp_reg, tmp_reg);
+  simple_move32(masm, tmp, body_arg);
+  simple_move32(masm, tmp, length_arg);
+  __ bind(done);
+}
+
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
 // convention (handlizes oops, etc), transitions to native, makes the call,
 // returns to java state (possibly blocking), unhandlizes any result and
 // returns.
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions.  The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GC_locker
+// lock_critical/unlock_critical semantics are followed.  Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+//    if (GC_locker::needs_gc())
+//      SharedRuntime::block_for_jni_critical();
+//    tranistion to thread_in_native
+//    unpack arrray arguments and call native entry point
+//    check for safepoint in progress
+//    check if any thread suspend flags are set
+//      call into JVM and possible unlock the JNI critical
+//      if a GC was suppressed while in the critical native.
+//    transition back to thread_in_Java
+//    return to caller
+//
 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
                                                 methodHandle method,
                                                 int compile_id,
@@ -1105,6 +1331,13 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
 
   // An OopMap for lock (and class if static)
   OopMapSet *oop_maps = new OopMapSet();
@@ -1115,30 +1348,72 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
-  int i;
-  for (i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
-  }
-
-
   // Now figure out where the args must be stored and how much stack space
-  // they require (neglecting out_preserve_stack_slots but space for storing
-  // the 1st six register arguments). It's weird see int_stk_helper.
-  //
+  // they require.
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
@@ -1151,9 +1426,44 @@
   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 
   // Now the space for the inbound oop handle area
+  int total_save_slots = 2 * VMRegImpl::slots_per_word; // 2 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
 
   int oop_handle_offset = stack_slots;
-  stack_slots += 2*VMRegImpl::slots_per_word;
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
@@ -1161,7 +1471,6 @@
   int klass_offset = -1;
   int lock_slot_offset = 0;
   bool is_static = false;
-  int oop_temp_slot_offset = 0;
 
   if (method->is_static()) {
     klass_slot_offset = stack_slots;
@@ -1221,7 +1530,7 @@
   // First thing make an ic check to see if we should even be here
 
   // We are free to use all registers as temps without saving them and
-  // restoring them except rbp,. rbp, is the only callee save register
+  // restoring them except rbp. rbp is the only callee save register
   // as far as the interpreter and the compiler(s) are concerned.
 
 
@@ -1230,7 +1539,6 @@
   Label hit;
   Label exception_pending;
 
-
   __ verify_oop(receiver);
   __ cmpptr(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
   __ jcc(Assembler::equal, hit);
@@ -1292,11 +1600,10 @@
 
   // Generate a new frame for the wrapper.
   __ enter();
-  // -2 because return address is already present and so is saved rbp,
+  // -2 because return address is already present and so is saved rbp
   __ subptr(rsp, stack_size - 2*wordSize);
 
-  // Frame is now completed as far a size and linkage.
-
+  // Frame is now completed as far as size and linkage.
   int frame_complete = ((intptr_t)__ pc()) - start;
 
   // Calculate the difference between rsp and rbp,. We need to know it
@@ -1319,7 +1626,6 @@
   // Compute the rbp, offset for any slots used after the jni call
 
   int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
-  int oop_temp_slot_rbp_offset = (oop_temp_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
 
   // We use rdi as a thread pointer because it is callee save and
   // if we load it once it is usable thru the entire wrapper
@@ -1332,6 +1638,10 @@
 
   __ get_thread(thread);
 
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1353,7 +1663,7 @@
   // vectors we have in our possession. We simply walk the java vector to
   // get the source locations and the c vector to get the destinations.
 
-  int c_arg = method->is_static() ? 2 : 1 ;
+  int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 );
 
   // Record rsp-based slot for receiver on stack for non-static methods
   int receiver_offset = -1;
@@ -1373,10 +1683,16 @@
   // Are free to temporaries if we have to do  stack to steck moves.
   // All inbound args are referenced based on rbp, and all outbound args via rsp.
 
-  for (i = 0; i < total_in_args ; i++, c_arg++ ) {
+  for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -1408,7 +1724,7 @@
 
   // Pre-load a static method's oop into rsi.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
 
     //  load opp into a register
     __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
@@ -1463,6 +1779,7 @@
 
   // Lock a synchronized method
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
 
 
     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
@@ -1529,14 +1846,15 @@
 
 
   // get JNIEnv* which is first argument to native
-
-  __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
-  __ movptr(Address(rsp, 0), rdx);
+  if (!is_critical_native) {
+    __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
+    __ movptr(Address(rsp, 0), rdx);
+  }
 
   // Now set thread in native
   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
 
-  __ call(RuntimeAddress(method->native_function()));
+  __ call(RuntimeAddress(native_func));
 
   // WARNING - on Windows Java Natives use pascal calling convention and pop the
   // arguments off of the stack. We could just re-adjust the stack pointer here
@@ -1591,6 +1909,8 @@
     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
   }
 
+  Label after_transition;
+
   // check for safepoint operation in progress and/or pending suspend requests
   { Label Continue;
 
@@ -1611,17 +1931,29 @@
     //
     save_native_result(masm, ret_type, stack_slots);
     __ push(thread);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
-                                            JavaThread::check_special_condition_for_native_trans)));
+    if (!is_critical_native) {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                              JavaThread::check_special_condition_for_native_trans)));
+    } else {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                              JavaThread::check_special_condition_for_native_trans_and_transition)));
+    }
     __ increment(rsp, wordSize);
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
 
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ jmpb(after_transition);
+    }
+
     __ bind(Continue);
   }
 
   // change thread state
   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
+  __ bind(after_transition);
 
   Label reguard;
   Label reguard_done;
@@ -1710,15 +2042,15 @@
       __ verify_oop(rax);
   }
 
-  // reset handle block
-  __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
-
-  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
-
-  // Any exception pending?
-  __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
-  __ jcc(Assembler::notEqual, exception_pending);
-
+  if (!is_critical_native) {
+    // reset handle block
+    __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
+    __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
+
+    // Any exception pending?
+    __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+    __ jcc(Assembler::notEqual, exception_pending);
+  }
 
   // no exception, we're almost done
 
@@ -1829,16 +2161,18 @@
 
   // BEGIN EXCEPTION PROCESSING
 
-  // Forward  the exception
-  __ bind(exception_pending);
-
-  // remove possible return value from FPU register stack
-  __ empty_FPU_stack();
-
-  // pop our frame
-  __ leave();
-  // and forward the exception
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  if (!is_critical_native) {
+    // Forward  the exception
+    __ bind(exception_pending);
+
+    // remove possible return value from FPU register stack
+    __ empty_FPU_stack();
+
+    // pop our frame
+    __ leave();
+    // and forward the exception
+    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
 
   __ flush();
 
@@ -1851,6 +2185,11 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
   return nm;
 
 }
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -938,6 +938,25 @@
   }
 }
 
+static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ movq(rax, Address(rbp, reg2offset_in(src.first())));
+      __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
+    } else {
+      // stack to reg
+      __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
+  } else {
+    if (dst.first() != src.first()) {
+      __ movq(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
 
 // An oop arg. Must pass a handle not the oop itself
 static void object_move(MacroAssembler* masm,
@@ -1152,6 +1171,203 @@
     }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  int handle_index = 0;
+  // Save down double word first
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+      } else {
+        __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+      }
+    }
+    if (in_regs[i].first()->is_Register() &&
+        (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movq(Address(rsp, offset), in_regs[i].first()->as_Register());
+        if (in_sig_bt[i] == T_ARRAY) {
+          map->set_oop(VMRegImpl::stack2reg(slot));;
+        }
+      } else {
+        __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset));
+      }
+    }
+  }
+  // Save or restore single word registers
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_Register()) {
+      int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      assert(handle_index <= stack_slots, "overflow");
+
+      // Value is in an input register pass we must flush it to the stack
+      const Register reg = in_regs[i].first()->as_Register();
+      switch (in_sig_bt[i]) {
+        case T_BOOLEAN:
+        case T_CHAR:
+        case T_BYTE:
+        case T_SHORT:
+        case T_INT:
+          if (map != NULL) {
+            __ movl(Address(rsp, offset), reg);
+          } else {
+            __ movl(reg, Address(rsp, offset));
+          }
+          break;
+        case T_ARRAY:
+        case T_LONG:
+          // handled above
+          break;
+        case T_OBJECT:
+        default: ShouldNotReachHere();
+      }
+    } else if (in_regs[i].first()->is_XMMRegister()) {
+      if (in_sig_bt[i] == T_FLOAT) {
+        int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+        int offset = slot * VMRegImpl::stack_slot_size;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+        } else {
+          __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+        }
+      }
+    } else if (in_regs[i].first()->is_stack()) {
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+      }
+    }
+  }
+}
+
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               int stack_slots,
+                                               int total_c_args,
+                                               int total_in_args,
+                                               int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
+  __ jcc(Assembler::equal, cont);
+
+  // Save down any incoming oops and call into the runtime to halt for a GC
+
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  address the_pc = __ pc();
+  oop_maps->add_gc_map( __ offset(), map);
+  __ set_last_Java_frame(rsp, noreg, the_pc);
+
+  __ block_comment("block_for_jni_critical");
+  __ movptr(c_rarg0, r15_thread);
+  __ mov(r12, rsp); // remember sp
+  __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
+  __ andptr(rsp, -16); // align stack as required by ABI
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
+  __ mov(rsp, r12); // restore sp
+  __ reinit_heapbase();
+
+  __ reset_last_Java_frame(false, true);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args - 1; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        __ xorptr(reg, reg);
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      } else if (in_regs[i].first()->is_stack()) {
+        // Nothing to do
+      } else {
+        ShouldNotReachHere();
+      }
+      if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
+        i++;
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  Register tmp_reg = rax;
+  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+
+  // Pass the length, ptr pair
+  Label is_null, done;
+  VMRegPair tmp;
+  tmp.set_ptr(tmp_reg->as_VMReg());
+  if (reg.first()->is_stack()) {
+    // Load the arg up from the stack
+    move_ptr(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
+  __ jccb(Assembler::equal, is_null);
+  __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  move_ptr(masm, tmp, body_arg);
+  // load the length relative to the body.
+  __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
+                           arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  move32_64(masm, tmp, length_arg);
+  __ jmpb(done);
+  __ bind(is_null);
+  // Pass zeros
+  __ xorptr(tmp_reg, tmp_reg);
+  move_ptr(masm, tmp, body_arg);
+  move32_64(masm, tmp, length_arg);
+  __ bind(done);
+}
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
@@ -1166,10 +1382,14 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
-  // Native nmethod wrappers never take possesion of the oop arguments.
-  // So the caller will gc the arguments. The only thing we need an
-  // oopMap for is if the call is static
-  //
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
+
   // An OopMap for lock (and class if static)
   OopMapSet *oop_maps = new OopMapSet();
   intptr_t start = (intptr_t)__ pc();
@@ -1180,27 +1400,72 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
-  }
-
-  for (int i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
   // Now figure out where the args must be stored and how much stack space
   // they require.
-  //
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
@@ -1213,13 +1478,47 @@
   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 
   // Now the space for the inbound oop handle area
+  int total_save_slots = 6 * VMRegImpl::slots_per_word;  // 6 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
 
   int oop_handle_offset = stack_slots;
-  stack_slots += 6*VMRegImpl::slots_per_word;
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
-  int oop_temp_slot_offset = 0;
   int klass_slot_offset = 0;
   int klass_offset = -1;
   int lock_slot_offset = 0;
@@ -1272,7 +1571,6 @@
 
   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 
-
   // First thing make an ic check to see if we should even be here
 
   // We are free to use all registers as temps without saving them and
@@ -1283,22 +1581,22 @@
   const Register ic_reg = rax;
   const Register receiver = j_rarg0;
 
-  Label ok;
+  Label hit;
   Label exception_pending;
 
   assert_different_registers(ic_reg, receiver, rscratch1);
   __ verify_oop(receiver);
   __ load_klass(rscratch1, receiver);
   __ cmpq(ic_reg, rscratch1);
-  __ jcc(Assembler::equal, ok);
+  __ jcc(Assembler::equal, hit);
 
   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
-  __ bind(ok);
-
   // Verified entry point must be aligned
   __ align(8);
 
+  __ bind(hit);
+
   int vep_offset = ((intptr_t)__ pc()) - start;
 
   // The instruction at the verified entry point must be 5 bytes or longer
@@ -1319,9 +1617,8 @@
   // -2 because return address is already present and so is saved rbp
   __ subptr(rsp, stack_size - 2*wordSize);
 
-    // Frame is now completed as far as size and linkage.
-
-    int frame_complete = ((intptr_t)__ pc()) - start;
+  // Frame is now completed as far as size and linkage.
+  int frame_complete = ((intptr_t)__ pc()) - start;
 
 #ifdef ASSERT
     {
@@ -1341,7 +1638,10 @@
 
   const Register oop_handle_reg = r14;
 
-
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1390,9 +1690,36 @@
 
 #endif /* ASSERT */
 
-
+  if (is_critical_native) {
+    // The mapping of Java and C arguments passed in registers are
+    // rotated by one, which helps when passing arguments to regular
+    // Java method but for critical natives that creates a cycle which
+    // can cause arguments to be killed before they are used.  Break
+    // the cycle by moving the first argument into a temporary
+    // register.
+    for (int i = 0; i < total_c_args; i++) {
+      if (in_regs[i].first()->is_Register() &&
+          in_regs[i].first()->as_Register() == rdi) {
+        __ mov(rbx, rdi);
+        in_regs[i].set1(rbx->as_VMReg());
+      }
+    }
+  }
+
+  // This may iterate in two different directions depending on the
+  // kind of native it is.  The reason is that for regular JNI natives
+  // the incoming and outgoing registers are offset upwards and for
+  // critical natives they are offset down.
   int c_arg = total_c_args - 1;
-  for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
+  int stride = -1;
+  int init = total_in_args - 1;
+  if (is_critical_native) {
+    // stride forwards
+    c_arg = 0;
+    stride = 1;
+    init = 0;
+  }
+  for (int i = init, count = 0; count < total_in_args; i += stride, c_arg += stride, count++ ) {
 #ifdef ASSERT
     if (in_regs[i].first()->is_Register()) {
       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
@@ -1407,7 +1734,20 @@
 #endif /* ASSERT */
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+#ifdef ASSERT
+          if (out_regs[c_arg].first()->is_Register()) {
+            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+          } else if (out_regs[c_arg].first()->is_XMMRegister()) {
+            freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
+          }
+#endif
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -1443,7 +1783,7 @@
 
   // Pre-load a static method's oop into r14.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
 
     //  load oop into a register
     __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
@@ -1509,6 +1849,7 @@
   Label lock_done;
 
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
 
 
     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
@@ -1572,13 +1913,14 @@
 
 
   // get JNIEnv* which is first argument to native
-
-  __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
+  if (!is_critical_native) {
+    __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
+  }
 
   // Now set thread in native
   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
 
-  __ call(RuntimeAddress(method->native_function()));
+  __ call(RuntimeAddress(native_func));
 
     // Either restore the MXCSR register after returning from the JNI Call
     // or verify that it wasn't changed.
@@ -1634,6 +1976,7 @@
     }
   }
 
+  Label after_transition;
 
   // check for safepoint operation in progress and/or pending suspend requests
   {
@@ -1659,16 +2002,28 @@
     __ mov(r12, rsp); // remember sp
     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
     __ andptr(rsp, -16); // align stack as required by ABI
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+    if (!is_critical_native) {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+    } else {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
+    }
     __ mov(rsp, r12); // restore sp
     __ reinit_heapbase();
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ jmpb(after_transition);
+    }
+
     __ bind(Continue);
   }
 
   // change thread state
   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
+  __ bind(after_transition);
 
   Label reguard;
   Label reguard_done;
@@ -1746,17 +2101,21 @@
       __ verify_oop(rax);
   }
 
-  // reset handle block
-  __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
-  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  if (!is_critical_native) {
+    // reset handle block
+    __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
+    __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  }
 
   // pop our frame
 
   __ leave();
 
-  // Any exception pending?
-  __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
-  __ jcc(Assembler::notEqual, exception_pending);
+  if (!is_critical_native) {
+    // Any exception pending?
+    __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+    __ jcc(Assembler::notEqual, exception_pending);
+  }
 
   // Return
 
@@ -1764,12 +2123,13 @@
 
   // Unexpected paths are out of line and go here
 
-  // forward the exception
-  __ bind(exception_pending);
-
-  // and forward the exception
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-
+  if (!is_critical_native) {
+    // forward the exception
+    __ bind(exception_pending);
+
+    // and forward the exception
+    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
 
   // Slow path locking & unlocking
   if (method->is_synchronized()) {
@@ -1876,6 +2236,11 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
   return nm;
 
 }
--- a/hotspot/src/share/vm/code/nmethod.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -462,6 +462,7 @@
   _speculatively_disconnected = 0;
   _has_unsafe_access          = 0;
   _has_method_handle_invokes  = 0;
+  _lazy_critical_native       = 0;
   _marked_for_deoptimization  = 0;
   _lock_count                 = 0;
   _stack_traversal_mark       = 0;
--- a/hotspot/src/share/vm/code/nmethod.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/code/nmethod.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -175,6 +175,7 @@
   // set during construction
   unsigned int _has_unsafe_access:1;         // May fault due to unsafe access.
   unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes?
+  unsigned int _lazy_critical_native:1;      // Lazy JNI critical native
 
   // Protected by Patching_lock
   unsigned char _state;                      // {alive, not_entrant, zombie, unloaded}
@@ -430,7 +431,10 @@
   void  set_has_method_handle_invokes(bool z)     { _has_method_handle_invokes = z; }
 
   bool  is_speculatively_disconnected() const     { return _speculatively_disconnected; }
-  void  set_speculatively_disconnected(bool z)     { _speculatively_disconnected = z; }
+  void  set_speculatively_disconnected(bool z)    { _speculatively_disconnected = z; }
+
+  bool  is_lazy_critical_native() const           { return _lazy_critical_native; }
+  void  set_lazy_critical_native(bool z)          { _lazy_critical_native = z; }
 
   int   comp_level() const                        { return _comp_level; }
 
--- a/hotspot/src/share/vm/memory/gcLocker.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/memory/gcLocker.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -69,7 +69,7 @@
     _needs_gc = true;
     if (PrintJNIGCStalls && PrintGCDetails) {
       ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
-      _wait_begin = tty->time_stamp().milliseconds();
+      _wait_begin = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
       gclog_or_tty->print_cr(INT64_FORMAT ": Setting _needs_gc. Thread \"%s\" %d locked.",
                              _wait_begin, Thread::current()->name(), _jni_lock_count);
     }
@@ -86,7 +86,7 @@
     if (PrintJNIGCStalls && PrintGCDetails) {
       ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
       gclog_or_tty->print_cr(INT64_FORMAT ": Allocation failed. Thread \"%s\" is stalled by JNI critical section, %d locked.",
-                             tty->time_stamp().milliseconds() - _wait_begin, Thread::current()->name(), _jni_lock_count);
+                             (os::javaTimeNanos() / NANOSECS_PER_MILLISEC) - _wait_begin, Thread::current()->name(), _jni_lock_count);
     }
   }
 
@@ -132,7 +132,7 @@
         if (PrintJNIGCStalls && PrintGCDetails) {
           ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
           gclog_or_tty->print_cr(INT64_FORMAT ": Thread \"%s\" is performing GC after exiting critical section, %d locked",
-                                 tty->time_stamp().milliseconds() - _wait_begin, Thread::current()->name(), _jni_lock_count);
+                                 (os::javaTimeNanos() / NANOSECS_PER_MILLISEC) - _wait_begin, Thread::current()->name(), _jni_lock_count);
         }
         Universe::heap()->collect(GCCause::_gc_locker);
       }
--- a/hotspot/src/share/vm/memory/gcLocker.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/memory/gcLocker.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -164,6 +164,8 @@
   // is set, the slow path is always taken, till _needs_gc is cleared.
   static void lock_critical(JavaThread* thread);
   static void unlock_critical(JavaThread* thread);
+
+  static address needs_gc_address() { return (address) &_needs_gc; }
 };
 
 
--- a/hotspot/src/share/vm/oops/arrayOop.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/oops/arrayOop.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #ifndef PRODUCT
 
 #include "oops/arrayOop.hpp"
+#include "oops/oop.inline.hpp"
 #include "utilities/globalDefinitions.hpp"
 
 bool arrayOopDesc::check_max_length_overflow(BasicType type) {
--- a/hotspot/src/share/vm/oops/methodOop.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/oops/methodOop.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -596,6 +596,11 @@
   clear_code();
 }
 
+address methodOopDesc::critical_native_function() {
+  methodHandle mh(this);
+  return NativeLookup::lookup_critical_entry(mh);
+}
+
 
 void methodOopDesc::set_signature_handler(address handler) {
   address* signature_handler =  signature_handler_addr();
--- a/hotspot/src/share/vm/oops/methodOop.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/oops/methodOop.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -403,6 +403,8 @@
     native_bind_event_is_interesting = true
   };
   address native_function() const                { return *(native_function_addr()); }
+  address critical_native_function();
+
   // Must specify a real function (not NULL).
   // Use clear_native_function() to unregister.
   void set_native_function(address function, bool post_event_flag);
--- a/hotspot/src/share/vm/prims/nativeLookup.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/prims/nativeLookup.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -91,6 +91,19 @@
 }
 
 
+char* NativeLookup::critical_jni_name(methodHandle method) {
+  stringStream st;
+  // Prefix
+  st.print("JavaCritical_");
+  // Klass name
+  mangle_name_on(&st, method->klass_name());
+  st.print("_");
+  // Method name
+  mangle_name_on(&st, method->name());
+  return st.as_string();
+}
+
+
 char* NativeLookup::long_jni_name(methodHandle method) {
   // Signature ignore the wrapping parenteses and the trailing return type
   stringStream st;
@@ -193,6 +206,34 @@
 }
 
 
+address NativeLookup::lookup_critical_style(methodHandle method, char* pure_name, const char* long_name, int args_size, bool os_style) {
+  if (!method->has_native_function()) {
+    return NULL;
+  }
+
+  address current_entry = method->native_function();
+
+  char dll_name[JVM_MAXPATHLEN];
+  int offset;
+  if (os::dll_address_to_library_name(current_entry, dll_name, sizeof(dll_name), &offset)) {
+    char ebuf[32];
+    void* dll = os::dll_load(dll_name, ebuf, sizeof(ebuf));
+    if (dll != NULL) {
+      // Compute complete JNI name for style
+      stringStream st;
+      if (os_style) os::print_jni_name_prefix_on(&st, args_size);
+      st.print_raw(pure_name);
+      st.print_raw(long_name);
+      if (os_style) os::print_jni_name_suffix_on(&st, args_size);
+      char* jni_name = st.as_string();
+      return (address)os::dll_lookup(dll, jni_name);
+    }
+  }
+
+  return NULL;
+}
+
+
 // Check all the formats of native implementation name to see if there is one
 // for the specified method.
 address NativeLookup::lookup_entry(methodHandle method, bool& in_base_library, TRAPS) {
@@ -228,6 +269,58 @@
   return entry; // NULL indicates not found
 }
 
+// Check all the formats of native implementation name to see if there is one
+// for the specified method.
+address NativeLookup::lookup_critical_entry(methodHandle method) {
+  if (!CriticalJNINatives) return NULL;
+
+  if (method->is_synchronized() ||
+      !method->is_static()) {
+    // Only static non-synchronized methods are allowed
+    return NULL;
+  }
+
+  ResourceMark rm;
+  address entry = NULL;
+
+  Symbol* signature = method->signature();
+  for (int end = 0; end < signature->utf8_length(); end++) {
+    if (signature->byte_at(end) == 'L') {
+      // Don't allow object types
+      return NULL;
+    }
+  }
+
+  // Compute critical name
+  char* critical_name = critical_jni_name(method);
+
+  // Compute argument size
+  int args_size = 1                             // JNIEnv
+                + (method->is_static() ? 1 : 0) // class for static methods
+                + method->size_of_parameters(); // actual parameters
+
+
+  // 1) Try JNI short style
+  entry = lookup_critical_style(method, critical_name, "",        args_size, true);
+  if (entry != NULL) return entry;
+
+  // Compute long name
+  char* long_name = long_jni_name(method);
+
+  // 2) Try JNI long style
+  entry = lookup_critical_style(method, critical_name, long_name, args_size, true);
+  if (entry != NULL) return entry;
+
+  // 3) Try JNI short style without os prefix/suffix
+  entry = lookup_critical_style(method, critical_name, "",        args_size, false);
+  if (entry != NULL) return entry;
+
+  // 4) Try JNI long style without os prefix/suffix
+  entry = lookup_critical_style(method, critical_name, long_name, args_size, false);
+
+  return entry; // NULL indicates not found
+}
+
 // Check if there are any JVM TI prefixes which have been applied to the native method name.
 // If any are found, remove them before attemping the look up of the
 // native implementation again.
--- a/hotspot/src/share/vm/prims/nativeLookup.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/prims/nativeLookup.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,15 +36,18 @@
   // JNI name computation
   static char* pure_jni_name(methodHandle method);
   static char* long_jni_name(methodHandle method);
+  static char* critical_jni_name(methodHandle method);
 
   // Style specific lookup
   static address lookup_style(methodHandle method, char* pure_name, const char* long_name, int args_size, bool os_style, bool& in_base_library, TRAPS);
+  static address lookup_critical_style(methodHandle method, char* pure_name, const char* long_name, int args_size, bool os_style);
   static address lookup_base (methodHandle method, bool& in_base_library, TRAPS);
   static address lookup_entry(methodHandle method, bool& in_base_library, TRAPS);
   static address lookup_entry_prefixed(methodHandle method, bool& in_base_library, TRAPS);
  public:
   // Lookup native function. May throw UnsatisfiedLinkError.
   static address lookup(methodHandle method, bool& in_base_library, TRAPS);
+  static address lookup_critical_entry(methodHandle method);
 
   // Lookup native functions in base library.
   static address base_library_lookup(const char* class_name, const char* method_name, const char* signature);
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -659,6 +659,12 @@
   develop(bool, SpecialArraysEquals, true,                                  \
           "special version of Arrays.equals(char[],char[])")                \
                                                                             \
+  product(bool, CriticalJNINatives, true,                                   \
+          "check for critical JNI entry points")                            \
+                                                                            \
+  notproduct(bool, StressCriticalJNINatives, false,                         \
+            "Exercise register saving code in critical natives")            \
+                                                                            \
   product(bool, UseSSE42Intrinsics, false,                                  \
           "SSE4.2 versions of intrinsics")                                  \
                                                                             \
--- a/hotspot/src/share/vm/runtime/safepoint.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/runtime/safepoint.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -136,7 +136,6 @@
 
   RuntimeService::record_safepoint_begin();
 
-  {
   MutexLocker mu(Safepoint_lock);
 
   // Reset the count of active JNI critical threads
@@ -399,7 +398,6 @@
     // Record how much time spend on the above cleanup tasks
     update_statistics_on_cleanup_end(os::javaTimeNanos());
   }
-  }
 }
 
 // Wake up all threads, so they are ready to resume execution after the safepoint
@@ -546,6 +544,42 @@
 }
 
 
+// See if the thread is running inside a lazy critical native and
+// update the thread critical count if so.  Also set a suspend flag to
+// cause the native wrapper to return into the JVM to do the unlock
+// once the native finishes.
+void SafepointSynchronize::check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state) {
+  if (state == _thread_in_native &&
+      thread->has_last_Java_frame() &&
+      thread->frame_anchor()->walkable()) {
+    // This thread might be in a critical native nmethod so look at
+    // the top of the stack and increment the critical count if it
+    // is.
+    frame wrapper_frame = thread->last_frame();
+    CodeBlob* stub_cb = wrapper_frame.cb();
+    if (stub_cb != NULL &&
+        stub_cb->is_nmethod() &&
+        stub_cb->as_nmethod_or_null()->is_lazy_critical_native()) {
+      // A thread could potentially be in a critical native across
+      // more than one safepoint, so only update the critical state on
+      // the first one.  When it returns it will perform the unlock.
+      if (!thread->do_critical_native_unlock()) {
+#ifdef ASSERT
+        if (!thread->in_critical()) {
+          GC_locker::increment_debug_jni_lock_count();
+        }
+#endif
+        thread->enter_critical();
+        // Make sure the native wrapper calls back on return to
+        // perform the needed critical unlock.
+        thread->set_critical_native_unlock();
+      }
+    }
+  }
+}
+
+
+
 // -------------------------------------------------------------------------------------------------------
 // Implementation of Safepoint callback point
 
@@ -874,6 +908,7 @@
   // agree and update the safepoint state here.
   if (SafepointSynchronize::safepoint_safe(_thread, state)) {
     roll_forward(_at_safepoint);
+    SafepointSynchronize::check_for_lazy_critical_native(_thread, state);
     if (_thread->in_critical()) {
       // Notice that this thread is in a critical section
       SafepointSynchronize::increment_jni_active_count();
--- a/hotspot/src/share/vm/runtime/safepoint.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/runtime/safepoint.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -140,6 +140,8 @@
 
   static bool safepoint_safe(JavaThread *thread, JavaThreadState state);
 
+  static void check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state);
+
   // Query
   inline static bool is_at_safepoint()   { return _state == _synchronized;  }
   inline static bool is_synchronizing()  { return _state == _synchronizing;  }
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -2678,6 +2678,20 @@
   return nm;
 }
 
+JRT_ENTRY_NO_ASYNC(void, SharedRuntime::block_for_jni_critical(JavaThread* thread))
+  assert(thread == JavaThread::current(), "must be");
+  // The code is about to enter a JNI lazy critical native method and
+  // _needs_gc is true, so if this thread is already in a critical
+  // section then just return, otherwise this thread should block
+  // until needs_gc has been cleared.
+  if (thread->in_critical()) {
+    return;
+  }
+  // Lock and unlock a critical section to give the system a chance to block
+  GC_locker::lock_critical(thread);
+  GC_locker::unlock_critical(thread);
+JRT_END
+
 #ifdef HAVE_DTRACE_H
 // Create a dtrace nmethod for this method.  The wrapper converts the
 // java compiled calling convention to the native convention, makes a dummy call
--- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -462,6 +462,9 @@
                                           VMRegPair *regs,
                                           BasicType ret_type );
 
+  // Block before entering a JNI critical method
+  static void block_for_jni_critical(JavaThread* thread);
+
 #ifdef HAVE_DTRACE_H
   // Generate a dtrace wrapper for a given method.  The method takes arguments
   // in the Java compiled code convention, marshals them to the native
--- a/hotspot/src/share/vm/runtime/thread.cpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Wed Feb 01 16:57:08 2012 -0800
@@ -33,6 +33,7 @@
 #include "interpreter/linkResolver.hpp"
 #include "interpreter/oopMapCache.hpp"
 #include "jvmtifiles/jvmtiEnv.hpp"
+#include "memory/gcLocker.inline.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/universe.inline.hpp"
 #include "oops/instanceKlass.hpp"
@@ -2278,6 +2279,26 @@
   }
 }
 
+// This is a variant of the normal
+// check_special_condition_for_native_trans with slightly different
+// semantics for use by critical native wrappers.  It does all the
+// normal checks but also performs the transition back into
+// thread_in_Java state.  This is required so that critical natives
+// can potentially block and perform a GC if they are the last thread
+// exiting the GC_locker.
+void JavaThread::check_special_condition_for_native_trans_and_transition(JavaThread *thread) {
+  check_special_condition_for_native_trans(thread);
+
+  // Finish the transition
+  thread->set_thread_state(_thread_in_Java);
+
+  if (thread->do_critical_native_unlock()) {
+    ThreadInVMfromJavaNoAsyncException tiv(thread);
+    GC_locker::unlock_critical(thread);
+    thread->clear_critical_native_unlock();
+  }
+}
+
 // We need to guarantee the Threads_lock here, since resumes are not
 // allowed during safepoint synchronization
 // Can only resume from an external suspension
--- a/hotspot/src/share/vm/runtime/thread.hpp	Wed Feb 01 07:59:01 2012 -0800
+++ b/hotspot/src/share/vm/runtime/thread.hpp	Wed Feb 01 16:57:08 2012 -0800
@@ -182,7 +182,8 @@
     _ext_suspended          = 0x40000000U, // thread has self-suspended
     _deopt_suspend          = 0x10000000U, // thread needs to self suspend for deopt
 
-    _has_async_exception    = 0x00000001U  // there is a pending async exception
+    _has_async_exception    = 0x00000001U, // there is a pending async exception
+    _critical_native_unlock = 0x00000002U  // Must call back to unlock JNI critical lock
   };
 
   // various suspension related flags - atomically updated
@@ -350,6 +351,15 @@
     clear_suspend_flag(_has_async_exception);
   }
 
+  bool do_critical_native_unlock() const { return (_suspend_flags & _critical_native_unlock) != 0; }
+
+  void set_critical_native_unlock() {
+    set_suspend_flag(_critical_native_unlock);
+  }
+  void clear_critical_native_unlock() {
+    clear_suspend_flag(_critical_native_unlock);
+  }
+
   // Support for Unhandled Oop detection
 #ifdef CHECK_UNHANDLED_OOPS
  private:
@@ -1038,6 +1048,11 @@
   // Check for async exception in addition to safepoint and suspend request.
   static void check_special_condition_for_native_trans(JavaThread *thread);
 
+  // Same as check_special_condition_for_native_trans but finishes the
+  // transition into thread_in_Java mode so that it can potentially
+  // block.
+  static void check_special_condition_for_native_trans_and_transition(JavaThread *thread);
+
   bool is_ext_suspend_completed(bool called_by_wait, int delay, uint32_t *bits);
   bool is_ext_suspend_completed_with_lock(uint32_t *bits) {
     MutexLockerEx ml(SR_lock(), Mutex::_no_safepoint_check_flag);
@@ -1311,8 +1326,9 @@
   // JNI critical regions. These can nest.
   bool in_critical()    { return _jni_active_critical > 0; }
   bool in_last_critical()  { return _jni_active_critical == 1; }
-  void enter_critical() { assert(Thread::current() == this,
-                                 "this must be current thread");
+  void enter_critical() { assert(Thread::current() == this ||
+                                 Thread::current()->is_VM_thread() && SafepointSynchronize::is_synchronizing(),
+                                 "this must be current thread or synchronizing");
                           _jni_active_critical++; }
   void exit_critical()  { assert(Thread::current() == this,
                                  "this must be current thread");