hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
changeset 363 99d43e8a76ad
parent 360 21d113ecbf6a
child 602 92e03692ddd6
child 591 04d2e26e6d69
child 1374 4c24294029a9
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Apr 16 17:36:29 2008 -0400
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu Apr 17 22:18:15 2008 -0400
@@ -1637,7 +1637,7 @@
     }
   } else if (dst.is_single_phys_reg()) {
     if (src.is_adjacent_aligned_on_stack(2)) {
-      __ ldd(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
+      __ ld_long(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
     } else {
       // dst is a single reg.
       // Remember lo is low address not msb for stack slots
@@ -2501,6 +2501,551 @@
 
 }
 
+#ifdef HAVE_DTRACE_H
+// ---------------------------------------------------------------------------
+// Generate a dtrace nmethod for a given signature.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// abi and then leaves nops at the position you would expect to call a native
+// function. When the probe is enabled the nops are replaced with a trap
+// instruction that dtrace inserts and the trace will cause a notification
+// to dtrace.
+//
+// The probes are only able to take primitive types and java/lang/String as
+// arguments.  No other java types are allowed. Strings are converted to utf8
+// strings so that from dtrace point of view java strings are converted to C
+// strings. There is an arbitrary fixed limit on the total space that a method
+// can use for converting the strings. (256 chars per string in the signature).
+// So any java string larger then this is truncated.
+
+static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
+static bool offsets_initialized = false;
+
+static VMRegPair reg64_to_VMRegPair(Register r) {
+  VMRegPair ret;
+  if (wordSize == 8) {
+    ret.set2(r->as_VMReg());
+  } else {
+    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
+  }
+  return ret;
+}
+
+
+nmethod *SharedRuntime::generate_dtrace_nmethod(
+    MacroAssembler *masm, methodHandle method) {
+
+
+  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
+  // be single threaded in this method.
+  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
+
+  // Fill in the signature array, for the calling-convention call.
+  int total_args_passed = method->size_of_parameters();
+
+  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
+  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
+
+  // The signature we are going to use for the trap that dtrace will see
+  // java/lang/String is converted. We drop "this" and any other object
+  // is converted to NULL.  (A one-slot java/lang/Long object reference
+  // is converted to a two-slot long, which is why we double the allocation).
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
+
+  int i=0;
+  int total_strings = 0;
+  int first_arg_to_pass = 0;
+  int total_c_args = 0;
+  int box_offset = java_lang_boxing_object::value_offset_in_bytes();
+
+  // Skip the receiver as dtrace doesn't want to see it
+  if( !method->is_static() ) {
+    in_sig_bt[i++] = T_OBJECT;
+    first_arg_to_pass = 1;
+  }
+
+  SignatureStream ss(method->signature());
+  for ( ; !ss.at_return_type(); ss.next()) {
+    BasicType bt = ss.type();
+    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
+    out_sig_bt[total_c_args++] = bt;
+    if( bt == T_OBJECT) {
+      symbolOop s = ss.as_symbol_or_null();
+      if (s == vmSymbols::java_lang_String()) {
+        total_strings++;
+        out_sig_bt[total_c_args-1] = T_ADDRESS;
+      } else if (s == vmSymbols::java_lang_Boolean() ||
+                 s == vmSymbols::java_lang_Byte()) {
+        out_sig_bt[total_c_args-1] = T_BYTE;
+      } else if (s == vmSymbols::java_lang_Character() ||
+                 s == vmSymbols::java_lang_Short()) {
+        out_sig_bt[total_c_args-1] = T_SHORT;
+      } else if (s == vmSymbols::java_lang_Integer() ||
+                 s == vmSymbols::java_lang_Float()) {
+        out_sig_bt[total_c_args-1] = T_INT;
+      } else if (s == vmSymbols::java_lang_Long() ||
+                 s == vmSymbols::java_lang_Double()) {
+        out_sig_bt[total_c_args-1] = T_LONG;
+        out_sig_bt[total_c_args++] = T_VOID;
+      }
+    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
+      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
+      // We convert double to long
+      out_sig_bt[total_c_args-1] = T_LONG;
+      out_sig_bt[total_c_args++] = T_VOID;
+    } else if ( bt == T_FLOAT) {
+      // We convert float to int
+      out_sig_bt[total_c_args-1] = T_INT;
+    }
+  }
+
+  assert(i==total_args_passed, "validly parsed signature");
+
+  // Now get the compiled-Java layout as input arguments
+  int comp_args_on_stack;
+  comp_args_on_stack = SharedRuntime::java_calling_convention(
+      in_sig_bt, in_regs, total_args_passed, false);
+
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the a  native (non-jni) function would expect them. To figure out
+  // where they go we convert the java signature to a C signature and remove
+  // T_VOID for any long/double we might have received.
+
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require (neglecting out_preserve_stack_slots but space for storing
+  // the 1st six register arguments). It's weird see int_stk_helper.
+  //
+  int out_arg_slots;
+  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Plus a temp for possible converion of float/double/long register args
+
+  int conversion_temp = stack_slots;
+  stack_slots += 2;
+
+
+  // Now space for the string(s) we must convert
+
+  int string_locs = stack_slots;
+  stack_slots += total_strings *
+                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      |---------------------|
+  //      | string[n]           |
+  //      |---------------------| <- string_locs[n]
+  //      | string[n-1]         |
+  //      |---------------------| <- string_locs[n-1]
+  //      | ...                 |
+  //      | ...                 |
+  //      |---------------------| <- string_locs[1]
+  //      | string[0]           |
+  //      |---------------------| <- string_locs[0]
+  //      | temp                |
+  //      |---------------------| <- conversion_temp
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+  intptr_t start = (intptr_t)__ pc();
+
+  // First thing make an ic check to see if we should even be here
+
+  {
+    Label L;
+    const Register temp_reg = G3_scratch;
+    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
+    __ verify_oop(O0);
+    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
+    __ cmp(temp_reg, G5_inline_cache_reg);
+    __ brx(Assembler::equal, true, Assembler::pt, L);
+    __ delayed()->nop();
+
+    __ jump_to(ic_miss, 0);
+    __ delayed()->nop();
+    __ align(CodeEntryAlignment);
+    __ bind(L);
+  }
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
+
+
+  // The instruction at the verified entry point must be 5 bytes or longer
+  // because it can be patched on the fly by make_non_entrant. The stack bang
+  // instruction fits that requirement.
+
+  // Generate stack overflow check before creating frame
+  __ generate_stack_overflow_check(stack_size);
+
+  assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
+         "valid size for make_non_entrant");
+
+  // Generate a new frame for the wrapper.
+  __ save(SP, -stack_size, SP);
+
+  // Frame is now completed as far a size and linkage.
+
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+    reg_destroyed[r] = false;
+  }
+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+    freg_destroyed[f] = false;
+  }
+
+#endif /* ASSERT */
+
+  VMRegPair zero;
+  zero.set2(G0->as_VMReg());
+
+  int c_arg, j_arg;
+
+  Register conversion_off = noreg;
+
+  for (j_arg = first_arg_to_pass, c_arg = 0 ;
+       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
+
+    VMRegPair src = in_regs[j_arg];
+    VMRegPair dst = out_regs[c_arg];
+
+#ifdef ASSERT
+    if (src.first()->is_Register()) {
+      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
+    } else if (src.first()->is_FloatRegister()) {
+      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
+                                               FloatRegisterImpl::S)], "ack!");
+    }
+    if (dst.first()->is_Register()) {
+      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
+    } else if (dst.first()->is_FloatRegister()) {
+      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
+                                                 FloatRegisterImpl::S)] = true;
+    }
+#endif /* ASSERT */
+
+    switch (in_sig_bt[j_arg]) {
+      case T_ARRAY:
+      case T_OBJECT:
+        {
+          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
+              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
+            // need to unbox a one-slot value
+            Register in_reg = L0;
+            Register tmp = L2;
+            if ( src.first()->is_reg() ) {
+              in_reg = src.first()->as_Register();
+            } else {
+              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
+                     "must be");
+              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
+            }
+            // If the final destination is an acceptable register
+            if ( dst.first()->is_reg() ) {
+              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
+                tmp = dst.first()->as_Register();
+              }
+            }
+
+            Label skipUnbox;
+            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
+              __ mov(G0, tmp->successor());
+            }
+            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
+            __ delayed()->mov(G0, tmp);
+
+            switch (out_sig_bt[c_arg]) {
+                case T_BYTE:
+                  __ ldub(in_reg, box_offset, tmp); break;
+                case T_SHORT:
+                  __ lduh(in_reg, box_offset, tmp); break;
+                case T_INT:
+                  __ ld(in_reg, box_offset, tmp); break;
+                case T_LONG:
+                  __ ld_long(in_reg, box_offset, tmp); break;
+                default: ShouldNotReachHere();
+            }
+
+            __ bind(skipUnbox);
+            // If tmp wasn't final destination copy to final destination
+            if (tmp == L2) {
+              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
+              if (out_sig_bt[c_arg] == T_LONG) {
+                long_move(masm, tmp_as_VM, dst);
+              } else {
+                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
+              }
+            }
+            if (out_sig_bt[c_arg] == T_LONG) {
+              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
+              ++c_arg; // move over the T_VOID to keep the loop indices in sync
+            }
+          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
+            Register s =
+                src.first()->is_reg() ? src.first()->as_Register() : L2;
+            Register d =
+                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
+
+            // We store the oop now so that the conversion pass can reach
+            // while in the inner frame. This will be the only store if
+            // the oop is NULL.
+            if (s != L2) {
+              // src is register
+              if (d != L2) {
+                // dst is register
+                __ mov(s, d);
+              } else {
+                assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                          STACK_BIAS), "must be");
+                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
+              }
+            } else {
+                // src not a register
+                assert(Assembler::is_simm13(reg2offset(src.first()) +
+                           STACK_BIAS), "must be");
+                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
+                if (d == L2) {
+                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                             STACK_BIAS), "must be");
+                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
+                }
+            }
+          } else if (out_sig_bt[c_arg] != T_VOID) {
+            // Convert the arg to NULL
+            if (dst.first()->is_reg()) {
+              __ mov(G0, dst.first()->as_Register());
+            } else {
+              assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                         STACK_BIAS), "must be");
+              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
+            }
+          }
+        }
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        if (src.first()->is_stack()) {
+          // Stack to stack/reg is simple
+          move32_64(masm, src, dst);
+        } else {
+          if (dst.first()->is_reg()) {
+            // freg -> reg
+            int off =
+              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+            Register d = dst.first()->as_Register();
+            if (Assembler::is_simm13(off)) {
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, off);
+              __ ld(SP, off, d);
+            } else {
+              if (conversion_off == noreg) {
+                __ set(off, L6);
+                conversion_off = L6;
+              }
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, conversion_off);
+              __ ld(SP, conversion_off , d);
+            }
+          } else {
+            // freg -> mem
+            int off = STACK_BIAS + reg2offset(dst.first());
+            if (Assembler::is_simm13(off)) {
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, off);
+            } else {
+              if (conversion_off == noreg) {
+                __ set(off, L6);
+                conversion_off = L6;
+              }
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, conversion_off);
+            }
+          }
+        }
+        break;
+
+      case T_DOUBLE:
+        assert( j_arg + 1 < total_args_passed &&
+                in_sig_bt[j_arg + 1] == T_VOID &&
+                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
+        if (src.first()->is_stack()) {
+          // Stack to stack/reg is simple
+          long_move(masm, src, dst);
+        } else {
+          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
+
+          // Destination could be an odd reg on 32bit in which case
+          // we can't load direct to the destination.
+
+          if (!d->is_even() && wordSize == 4) {
+            d = L2;
+          }
+          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+          if (Assembler::is_simm13(off)) {
+            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
+                   SP, off);
+            __ ld_long(SP, off, d);
+          } else {
+            if (conversion_off == noreg) {
+              __ set(off, L6);
+              conversion_off = L6;
+            }
+            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
+                   SP, conversion_off);
+            __ ld_long(SP, conversion_off, d);
+          }
+          if (d == L2) {
+            long_move(masm, reg64_to_VMRegPair(L2), dst);
+          }
+        }
+        break;
+
+      case T_LONG :
+        // 32bit can't do a split move of something like g1 -> O0, O1
+        // so use a memory temp
+        if (src.is_single_phys_reg() && wordSize == 4) {
+          Register tmp = L2;
+          if (dst.first()->is_reg() &&
+              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
+            tmp = dst.first()->as_Register();
+          }
+
+          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+          if (Assembler::is_simm13(off)) {
+            __ stx(src.first()->as_Register(), SP, off);
+            __ ld_long(SP, off, tmp);
+          } else {
+            if (conversion_off == noreg) {
+              __ set(off, L6);
+              conversion_off = L6;
+            }
+            __ stx(src.first()->as_Register(), SP, conversion_off);
+            __ ld_long(SP, conversion_off, tmp);
+          }
+
+          if (tmp == L2) {
+            long_move(masm, reg64_to_VMRegPair(L2), dst);
+          }
+        } else {
+          long_move(masm, src, dst);
+        }
+        break;
+
+      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
+
+      default:
+        move32_64(masm, src, dst);
+    }
+  }
+
+
+  // If we have any strings we must store any register based arg to the stack
+  // This includes any still live xmm registers too.
+
+  if (total_strings > 0 ) {
+
+    // protect all the arg registers
+    __ save_frame(0);
+    __ mov(G2_thread, L7_thread_cache);
+    const Register L2_string_off = L2;
+
+    // Get first string offset
+    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
+
+    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
+      if (out_sig_bt[c_arg] == T_ADDRESS) {
+
+        VMRegPair dst = out_regs[c_arg];
+        const Register d = dst.first()->is_reg() ?
+            dst.first()->as_Register()->after_save() : noreg;
+
+        // It's a string the oop and it was already copied to the out arg
+        // position
+        if (d != noreg) {
+          __ mov(d, O0);
+        } else {
+          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
+                 "must be");
+          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
+        }
+        Label skip;
+
+        __ br_null(O0, false, Assembler::pn, skip);
+        __ delayed()->add(FP, L2_string_off, O1);
+
+        if (d != noreg) {
+          __ mov(O1, d);
+        } else {
+          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
+                 "must be");
+          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
+        }
+
+        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
+                relocInfo::runtime_call_type);
+        __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
+
+        __ bind(skip);
+
+      }
+
+    }
+    __ mov(L7_thread_cache, G2_thread);
+    __ restore();
+
+  }
+
+
+  // Ok now we are done. Need to place the nop that dtrace wants in order to
+  // patch in the trap
+
+  int patch_offset = ((intptr_t)__ pc()) - start;
+
+  __ nop();
+
+
+  // Return
+
+  __ ret();
+  __ delayed()->restore();
+
+  __ flush();
+
+  nmethod *nm = nmethod::new_dtrace_nmethod(
+      method, masm->code(), vep_offset, patch_offset, frame_complete,
+      stack_slots / VMRegImpl::slots_per_word);
+  return nm;
+
+}
+
+#endif // HAVE_DTRACE_H
+
 // this function returns the adjust size (in number of words) to a c2i adapter
 // activation for use during deoptimization
 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {