Merge
authortrims
Thu, 12 Mar 2009 18:16:36 -0700
changeset 2154 72a9b7284ccf
parent 2106 ec595a5e793e (current diff)
parent 2153 3e4f16dbc49a (diff)
child 2155 c5c3e5f4accc
Merge
hotspot/make/linux/makefiles/adlc.make
hotspot/make/solaris/makefiles/adlc.make
hotspot/src/cpu/sparc/vm/sparc.ad
hotspot/src/cpu/x86/vm/assembler_x86.cpp
hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp
hotspot/src/cpu/x86/vm/interp_masm_x86_32.cpp
hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp
hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp
hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp
hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp
hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp
hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp
hotspot/src/cpu/x86/vm/x86_32.ad
hotspot/src/cpu/x86/vm/x86_64.ad
hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp
hotspot/src/share/vm/adlc/adlparse.cpp
hotspot/src/share/vm/adlc/archDesc.cpp
hotspot/src/share/vm/adlc/dfa.cpp
hotspot/src/share/vm/adlc/dict2.cpp
hotspot/src/share/vm/adlc/filebuff.hpp
hotspot/src/share/vm/adlc/forms.cpp
hotspot/src/share/vm/adlc/formssel.cpp
hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp
hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp
hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp
hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
hotspot/src/share/vm/gc_interface/collectedHeap.cpp
hotspot/src/share/vm/gc_interface/collectedHeap.hpp
hotspot/src/share/vm/includeDB_core
hotspot/src/share/vm/includeDB_gc
hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp
hotspot/src/share/vm/libadt/dict.cpp
hotspot/src/share/vm/memory/cardTableModRefBS.cpp
hotspot/src/share/vm/memory/cardTableModRefBS.hpp
hotspot/src/share/vm/memory/genCollectedHeap.cpp
hotspot/src/share/vm/memory/heapInspection.cpp
hotspot/src/share/vm/oops/klass.hpp
hotspot/src/share/vm/oops/methodOop.hpp
hotspot/src/share/vm/opto/block.cpp
hotspot/src/share/vm/opto/cfgnode.cpp
hotspot/src/share/vm/opto/chaitin.cpp
hotspot/src/share/vm/opto/chaitin.hpp
hotspot/src/share/vm/opto/classes.hpp
hotspot/src/share/vm/opto/compile.cpp
hotspot/src/share/vm/opto/gcm.cpp
hotspot/src/share/vm/opto/graphKit.cpp
hotspot/src/share/vm/opto/ifg.cpp
hotspot/src/share/vm/opto/live.cpp
hotspot/src/share/vm/opto/loopnode.cpp
hotspot/src/share/vm/opto/macro.cpp
hotspot/src/share/vm/opto/matcher.cpp
hotspot/src/share/vm/opto/memnode.cpp
hotspot/src/share/vm/opto/memnode.hpp
hotspot/src/share/vm/opto/reg_split.cpp
hotspot/src/share/vm/opto/superword.cpp
hotspot/src/share/vm/opto/type.cpp
hotspot/src/share/vm/opto/type.hpp
hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/os.cpp
hotspot/src/share/vm/runtime/safepoint.cpp
hotspot/src/share/vm/runtime/sharedRuntime.cpp
hotspot/src/share/vm/services/heapDumper.cpp
hotspot/src/share/vm/utilities/vmError.cpp
hotspot/src/share/vm/utilities/vmError.hpp
--- a/hotspot/make/linux/makefiles/adlc.make	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/make/linux/makefiles/adlc.make	Thu Mar 12 18:16:36 2009 -0700
@@ -61,8 +61,8 @@
 CPPFLAGS += -DASSERT
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
-# Suppress warnings (for now)
-CFLAGS_WARN = -w
+# Compiler warnings are treated as errors
+CFLAGS_WARN = -Werror
 CFLAGS += $(CFLAGS_WARN)
 
 OBJECTNAMES = \
--- a/hotspot/make/solaris/makefiles/adlc.make	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/make/solaris/makefiles/adlc.make	Thu Mar 12 18:16:36 2009 -0700
@@ -67,6 +67,8 @@
 endif
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
+# Compiler warnings are treated as errors
+CFLAGS_WARN = +w -errwarn
 CFLAGS += $(CFLAGS_WARN)
 
 ifeq ("${Platform_compiler}", "sparcWorks")
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -2615,6 +2615,158 @@
   }
 }
 
+RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr,
+                                               Register tmp,
+                                               int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  Address a(tmp, (address) delayed_value_addr);
+  load_ptr_contents(a, tmp);
+
+#ifdef ASSERT
+  tst(tmp);
+  breakpoint_trap(zero, xcc);
+#endif
+
+  if (offset != 0)
+    add(tmp, offset, tmp);
+
+  return RegisterConstant(tmp);
+}
+
+
+void MacroAssembler::regcon_inc_ptr( RegisterConstant& dest, RegisterConstant src, Register temp ) {
+  assert(dest.register_or_noreg() != G0, "lost side effect");
+  if ((src.is_constant() && src.as_constant() == 0) ||
+      (src.is_register() && src.as_register() == G0)) {
+    // do nothing
+  } else if (dest.is_register()) {
+    add(dest.as_register(), ensure_rs2(src, temp), dest.as_register());
+  } else if (src.is_constant()) {
+    intptr_t res = dest.as_constant() + src.as_constant();
+    dest = RegisterConstant(res); // side effect seen by caller
+  } else {
+    assert(temp != noreg, "cannot handle constant += register");
+    add(src.as_register(), ensure_rs2(dest, temp), temp);
+    dest = RegisterConstant(temp); // side effect seen by caller
+  }
+}
+
+void MacroAssembler::regcon_sll_ptr( RegisterConstant& dest, RegisterConstant src, Register temp ) {
+  assert(dest.register_or_noreg() != G0, "lost side effect");
+  if (!is_simm13(src.constant_or_zero()))
+    src = (src.as_constant() & 0xFF);
+  if ((src.is_constant() && src.as_constant() == 0) ||
+      (src.is_register() && src.as_register() == G0)) {
+    // do nothing
+  } else if (dest.is_register()) {
+    sll_ptr(dest.as_register(), src, dest.as_register());
+  } else if (src.is_constant()) {
+    intptr_t res = dest.as_constant() << src.as_constant();
+    dest = RegisterConstant(res); // side effect seen by caller
+  } else {
+    assert(temp != noreg, "cannot handle constant <<= register");
+    set(dest.as_constant(), temp);
+    sll_ptr(temp, src, temp);
+    dest = RegisterConstant(temp); // side effect seen by caller
+  }
+}
+
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Register sethi_temp,
+                                             Label& L_no_such_interface) {
+  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+
+  lduw(recv_klass, instanceKlass::vtable_length_offset() * wordSize, scan_temp);
+  // %%% We should store the aligned, prescaled offset in the klassoop.
+  // Then the next several instructions would fold away.
+
+  int round_to_unit = ((HeapWordsPerLong > 1) ? BytesPerLong : 0);
+  int itb_offset = vtable_base;
+  if (round_to_unit != 0) {
+    // hoist first instruction of round_to(scan_temp, BytesPerLong):
+    itb_offset += round_to_unit - wordSize;
+  }
+  int itb_scale = exact_log2(vtableEntry::size() * wordSize);
+  sll(scan_temp, itb_scale,  scan_temp);
+  add(scan_temp, itb_offset, scan_temp);
+  if (round_to_unit != 0) {
+    // Round up to align_object_offset boundary
+    // see code for instanceKlass::start_of_itable!
+    // Was: round_to(scan_temp, BytesPerLong);
+    // Hoisted: add(scan_temp, BytesPerLong-1, scan_temp);
+    and3(scan_temp, -round_to_unit, scan_temp);
+  }
+  add(recv_klass, scan_temp, scan_temp);
+
+  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+  RegisterConstant itable_offset = itable_index;
+  regcon_sll_ptr(itable_offset, exact_log2(itableMethodEntry::size() * wordSize));
+  regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes());
+  add(recv_klass, ensure_rs2(itable_offset, sethi_temp), recv_klass);
+
+  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
+  //   if (scan->interface() == intf) {
+  //     result = (klass + scan->offset() + itable_index);
+  //   }
+  // }
+  Label search, found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    // %%%% Could load both offset and interface in one ldx, if they were
+    // in the opposite order.  This would save a load.
+    ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result);
+
+    // Check that this entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    bpr(Assembler::rc_z, false, Assembler::pn, method_result, L_no_such_interface);
+    delayed()->cmp(method_result, intf_klass);
+
+    if (peel) {
+      brx(Assembler::equal,    false, Assembler::pt, found_method);
+    } else {
+      brx(Assembler::notEqual, false, Assembler::pn, search);
+      // (invert the test to fall through to found_method...)
+    }
+    delayed()->add(scan_temp, scan_step, scan_temp);
+
+    if (!peel)  break;
+
+    bind(search);
+  }
+
+  bind(found_method);
+
+  // Got a hit.
+  int ito_offset = itableOffsetEntry::offset_offset_in_bytes();
+  // scan_temp[-scan_step] points to the vtable offset we need
+  ito_offset -= scan_step;
+  lduw(scan_temp, ito_offset, scan_temp);
+  ld_ptr(recv_klass, scan_temp, method_result);
+}
+
+
 void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
                                           Register temp_reg,
                                           Label& done, Label* slow_case,
@@ -4057,6 +4209,24 @@
   card_table_write(bs->byte_map_base, tmp, store_addr);
 }
 
+// Loading values by size and signed-ness
+void MacroAssembler::load_sized_value(Register s1, RegisterConstant s2, Register d,
+                                      int size_in_bytes, bool is_signed) {
+  switch (size_in_bytes ^ (is_signed ? -1 : 0)) {
+  case ~8:  // fall through:
+  case  8:  ld_long( s1, s2, d ); break;
+  case ~4:  ldsw(    s1, s2, d ); break;
+  case  4:  lduw(    s1, s2, d ); break;
+  case ~2:  ldsh(    s1, s2, d ); break;
+  case  2:  lduh(    s1, s2, d ); break;
+  case ~1:  ldsb(    s1, s2, d ); break;
+  case  1:  ldub(    s1, s2, d ); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+
+
 void MacroAssembler::load_klass(Register src_oop, Register klass) {
   // The number of bytes in this code is used by
   // MachCallDynamicJavaNode::ret_addr_offset()
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -384,6 +384,12 @@
 
   inline bool is_simm13(int offset = 0);  // check disp+offset for overflow
 
+  Address plus_disp(int disp) const {     // bump disp by a small amount
+    Address a = (*this);
+    a._disp += disp;
+    return a;
+  }
+
   Address split_disp() const {            // deal with disp overflow
     Address a = (*this);
     int hi_disp = _disp & ~0x3ff;
@@ -1082,6 +1088,7 @@
   inline void add(    Register s1, Register s2, Register d );
   inline void add(    Register s1, int simm13a, Register d, relocInfo::relocType rtype = relocInfo::none);
   inline void add(    Register s1, int simm13a, Register d, RelocationHolder const& rspec);
+  inline void add(    Register s1, RegisterConstant s2, Register d, int offset = 0);
   inline void add(    const Address&  a,              Register d, int offset = 0);
 
   void addcc(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
@@ -1298,6 +1305,16 @@
   inline void ld(   const Address& a, Register d, int offset = 0 );
   inline void ldd(  const Address& a, Register d, int offset = 0 );
 
+  inline void ldub(  Register s1, RegisterConstant s2, Register d );
+  inline void ldsb(  Register s1, RegisterConstant s2, Register d );
+  inline void lduh(  Register s1, RegisterConstant s2, Register d );
+  inline void ldsh(  Register s1, RegisterConstant s2, Register d );
+  inline void lduw(  Register s1, RegisterConstant s2, Register d );
+  inline void ldsw(  Register s1, RegisterConstant s2, Register d );
+  inline void ldx(   Register s1, RegisterConstant s2, Register d );
+  inline void ld(    Register s1, RegisterConstant s2, Register d );
+  inline void ldd(   Register s1, RegisterConstant s2, Register d );
+
   // pp 177
 
   void ldsba(  Register s1, Register s2, int ia, Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
@@ -1518,6 +1535,13 @@
   inline void st(   Register d, const Address& a, int offset = 0 );
   inline void std(  Register d, const Address& a, int offset = 0 );
 
+  inline void stb(  Register d, Register s1, RegisterConstant s2 );
+  inline void sth(  Register d, Register s1, RegisterConstant s2 );
+  inline void stw(  Register d, Register s1, RegisterConstant s2 );
+  inline void stx(  Register d, Register s1, RegisterConstant s2 );
+  inline void std(  Register d, Register s1, RegisterConstant s2 );
+  inline void st(   Register d, Register s1, RegisterConstant s2 );
+
   // pp 177
 
   void stba(  Register d, Register s1, Register s2, int ia ) {             emit_long( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
@@ -1835,6 +1859,7 @@
   // Functions for isolating 64 bit shifts for LP64
   inline void sll_ptr( Register s1, Register s2, Register d );
   inline void sll_ptr( Register s1, int imm6a,   Register d );
+  inline void sll_ptr( Register s1, RegisterConstant s2, Register d );
   inline void srl_ptr( Register s1, Register s2, Register d );
   inline void srl_ptr( Register s1, int imm6a,   Register d );
 
@@ -1940,20 +1965,47 @@
   // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's
   inline void ld_ptr(   Register s1, Register s2, Register d );
   inline void ld_ptr(   Register s1, int simm13a, Register d);
+  inline void ld_ptr(   Register s1, RegisterConstant s2, Register d );
   inline void ld_ptr(  const Address& a, Register d, int offset = 0 );
   inline void st_ptr(  Register d, Register s1, Register s2 );
   inline void st_ptr(  Register d, Register s1, int simm13a);
+  inline void st_ptr(  Register d, Register s1, RegisterConstant s2 );
   inline void st_ptr(  Register d, const Address& a, int offset = 0 );
 
   // ld_long will perform ld for 32 bit VM's and ldx for 64 bit VM's
   // st_long will perform st for 32 bit VM's and stx for 64 bit VM's
   inline void ld_long( Register s1, Register s2, Register d );
   inline void ld_long( Register s1, int simm13a, Register d );
+  inline void ld_long( Register s1, RegisterConstant s2, Register d );
   inline void ld_long( const Address& a, Register d, int offset = 0 );
   inline void st_long( Register d, Register s1, Register s2 );
   inline void st_long( Register d, Register s1, int simm13a );
+  inline void st_long( Register d, Register s1, RegisterConstant s2 );
   inline void st_long( Register d, const Address& a, int offset = 0 );
 
+  // Loading values by size and signed-ness
+  void load_sized_value(Register s1, RegisterConstant s2, Register d,
+                        int size_in_bytes, bool is_signed);
+
+  // Helpers for address formation.
+  // They update the dest in place, whether it is a register or constant.
+  // They emit no code at all if src is a constant zero.
+  // If dest is a constant and src is a register, the temp argument
+  // is required, and becomes the result.
+  // If dest is a register and src is a non-simm13 constant,
+  // the temp argument is required, and is used to materialize the constant.
+  void regcon_inc_ptr( RegisterConstant& dest, RegisterConstant src,
+                       Register temp = noreg );
+  void regcon_sll_ptr( RegisterConstant& dest, RegisterConstant src,
+                       Register temp = noreg );
+  RegisterConstant ensure_rs2(RegisterConstant rs2, Register sethi_temp) {
+    guarantee(sethi_temp != noreg, "constant offset overflow");
+    if (is_simm13(rs2.constant_or_zero()))
+      return rs2;               // register or short constant
+    set(rs2.as_constant(), sethi_temp);
+    return sethi_temp;
+  }
+
   // --------------------------------------------------
 
  public:
@@ -2267,6 +2319,14 @@
   );
   void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
 
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterConstant itable_index,
+                               Register method_result,
+                               Register temp_reg, Register temp2_reg,
+                               Label& no_such_interface);
+
   // Stack overflow checking
 
   // Note: this clobbers G3_scratch
@@ -2281,6 +2341,8 @@
   // stack overflow + shadow pages.  Clobbers tsp and scratch registers.
   void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch);
 
+  virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, Register tmp, int offset);
+
   void verify_tlab();
 
   Condition negate_condition(Condition cond);
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -143,6 +143,49 @@
 inline void Assembler::ld(  Register s1, int simm13a, Register d) { lduw( s1, simm13a, d); }
 #endif
 
+inline void Assembler::ldub(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsb(s1, s2.as_register(), d);
+  else                   ldsb(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldsb(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsb(s1, s2.as_register(), d);
+  else                   ldsb(s1, s2.as_constant(), d);
+}
+inline void Assembler::lduh(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsh(s1, s2.as_register(), d);
+  else                   ldsh(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldsh(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsh(s1, s2.as_register(), d);
+  else                   ldsh(s1, s2.as_constant(), d);
+}
+inline void Assembler::lduw(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsw(s1, s2.as_register(), d);
+  else                   ldsw(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldsw(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsw(s1, s2.as_register(), d);
+  else                   ldsw(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldx(   Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldx(s1, s2.as_register(), d);
+  else                   ldx(s1, s2.as_constant(), d);
+}
+inline void Assembler::ld(    Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ld(s1, s2.as_register(), d);
+  else                   ld(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldd(   Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldd(s1, s2.as_register(), d);
+  else                   ldd(s1, s2.as_constant(), d);
+}
+
+// form effective addresses this way:
+inline void Assembler::add(   Register s1, RegisterConstant s2, Register d, int offset) {
+  if (s2.is_register())  add(s1, s2.as_register(), d);
+  else                 { add(s1, s2.as_constant() + offset, d); offset = 0; }
+  if (offset != 0)       add(d,  offset,                    d);
+}
 
 inline void Assembler::ld(   const Address& a, Register d, int offset ) { relocate(a.rspec(offset)); ld(   a.base(), a.disp() + offset, d ); }
 inline void Assembler::ldsb( const Address& a, Register d, int offset ) { relocate(a.rspec(offset)); ldsb( a.base(), a.disp() + offset, d ); }
@@ -200,6 +243,27 @@
 inline void Assembler::st(  Register d, Register s1, Register s2) { stw(d, s1, s2); }
 inline void Assembler::st(  Register d, Register s1, int simm13a) { stw(d, s1, simm13a); }
 
+inline void Assembler::stb(  Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  stb(d, s1, s2.as_register());
+  else                   stb(d, s1, s2.as_constant());
+}
+inline void Assembler::sth(  Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  sth(d, s1, s2.as_register());
+  else                   sth(d, s1, s2.as_constant());
+}
+inline void Assembler::stx(  Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  stx(d, s1, s2.as_register());
+  else                   stx(d, s1, s2.as_constant());
+}
+inline void Assembler::std( Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  std(d, s1, s2.as_register());
+  else                   std(d, s1, s2.as_constant());
+}
+inline void Assembler::st(  Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  st(d, s1, s2.as_register());
+  else                   st(d, s1, s2.as_constant());
+}
+
 inline void Assembler::stb( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); stb( d, a.base(), a.disp() + offset); }
 inline void Assembler::sth( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); sth( d, a.base(), a.disp() + offset); }
 inline void Assembler::stw( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); stw( d, a.base(), a.disp() + offset); }
@@ -244,6 +308,14 @@
 #endif
 }
 
+inline void MacroAssembler::ld_ptr( Register s1, RegisterConstant s2, Register d ) {
+#ifdef _LP64
+  Assembler::ldx( s1, s2, d);
+#else
+  Assembler::ld(  s1, s2, d);
+#endif
+}
+
 inline void MacroAssembler::ld_ptr( const Address& a, Register d, int offset ) {
 #ifdef _LP64
   Assembler::ldx(  a, d, offset );
@@ -268,6 +340,14 @@
 #endif
 }
 
+inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterConstant s2 ) {
+#ifdef _LP64
+  Assembler::stx( d, s1, s2);
+#else
+  Assembler::st( d, s1, s2);
+#endif
+}
+
 inline void MacroAssembler::st_ptr(  Register d, const Address& a, int offset) {
 #ifdef _LP64
   Assembler::stx(  d, a, offset);
@@ -293,6 +373,14 @@
 #endif
 }
 
+inline void MacroAssembler::ld_long( Register s1, RegisterConstant s2, Register d ) {
+#ifdef _LP64
+  Assembler::ldx(s1, s2, d);
+#else
+  Assembler::ldd(s1, s2, d);
+#endif
+}
+
 inline void MacroAssembler::ld_long( const Address& a, Register d, int offset ) {
 #ifdef _LP64
   Assembler::ldx(a, d, offset );
@@ -317,6 +405,14 @@
 #endif
 }
 
+inline void MacroAssembler::st_long( Register d, Register s1, RegisterConstant s2 ) {
+#ifdef _LP64
+  Assembler::stx(d, s1, s2);
+#else
+  Assembler::std(d, s1, s2);
+#endif
+}
+
 inline void MacroAssembler::st_long( Register d, const Address& a, int offset ) {
 #ifdef _LP64
   Assembler::stx(d, a, offset);
@@ -359,6 +455,11 @@
 #endif
 }
 
+inline void MacroAssembler::sll_ptr( Register s1, RegisterConstant s2, Register d ) {
+  if (s2.is_register())  sll_ptr(s1, s2.as_register(), d);
+  else                   sll_ptr(s1, s2.as_constant(), d);
+}
+
 // Use the right branch for the platform
 
 inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) {
--- a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -2465,7 +2465,10 @@
 //   InterpreterRuntime::post_method_entry();
 // }
 // if (DTraceMethodProbes) {
-//   SharedRuntime::dtrace_method_entry(method, reciever);
+//   SharedRuntime::dtrace_method_entry(method, receiver);
+// }
+// if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+//   SharedRuntime::rc_trace_method_entry(method, receiver);
 // }
 
 void InterpreterMacroAssembler::notify_method_entry() {
@@ -2497,6 +2500,13 @@
       CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
       G2_thread, Lmethod);
   }
+
+  // RedefineClasses() tracing support for obsolete method entry
+  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+    call_VM_leaf(noreg,
+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+      G2_thread, Lmethod);
+  }
 }
 
 
--- a/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -243,7 +243,7 @@
 
   // Regenerate the instruction sequence that performs the 64 bit
   // sethi.  This only does the sethi.  The disp field (bottom 10 bits)
-  // must be handled seperately.
+  // must be handled separately.
   static void set_data64_sethi(address instaddr, intptr_t x);
 
   // combine the fields of a sethi/simm13 pair (simm13 = or, add, jmpl, ld/st)
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -2161,6 +2161,18 @@
     __ restore();
   }
 
+  // RedefineClasses() tracing support for obsolete method entry
+  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+    // create inner frame
+    __ save_frame(0);
+    __ mov(G2_thread, L7_thread_cache);
+    __ set_oop_constant(JNIHandles::make_local(method()), O1);
+    __ call_VM_leaf(L7_thread_cache,
+         CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+         G2_thread, O1);
+    __ restore();
+  }
+
   // We are in the jni frame unless saved_frame is true in which case
   // we are in one frame deeper (the "inner" frame). If we are in the
   // "inner" frames the args are in the Iregs and if the jni frame then
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Thu Mar 12 18:16:36 2009 -0700
@@ -189,7 +189,7 @@
 // double fp register numbers.  FloatRegisterImpl in register_sparc.hpp
 // wants 0-63, so we have to convert every time we want to use fp regs
 // with the macroassembler, using reg_to_DoubleFloatRegister_object().
-// 255 is a flag meaning 'dont go here'.
+// 255 is a flag meaning "don't go here".
 // I believe we can't handle callee-save doubles D32 and up until
 // the place in the sparc stack crawler that asserts on the 255 is
 // fixed up.
@@ -462,7 +462,7 @@
 
 // Macros to extract hi & lo halves from a long pair.
 // G0 is not part of any long pair, so assert on that.
-// Prevents accidently using G1 instead of G0.
+// Prevents accidentally using G1 instead of G0.
 #define LONG_HI_REG(x) (x)
 #define LONG_LO_REG(x) (x)
 
@@ -1431,7 +1431,7 @@
 
 #ifndef _LP64
   // In the LP64 build, all registers can be moved as aligned/adjacent
-  // pairs, so there's never any need to move the high bits seperately.
+  // pairs, so there's never any need to move the high bits separately.
   // The 32-bit builds have to deal with the 32-bit ABI which can force
   // all sorts of silly alignment problems.
 
@@ -1624,7 +1624,7 @@
   Register temp_reg   = G3;
   assert( G5_ic_reg != temp_reg, "conflicting registers" );
 
-  // Load klass from reciever
+  // Load klass from receiver
   __ load_klass(O0, temp_reg);
   // Compare against expected klass
   __ cmp(temp_reg, G5_ic_reg);
@@ -4149,7 +4149,7 @@
 
 //----------OPERAND CLASSES----------------------------------------------------
 // Operand Classes are groups of operands that are used to simplify
-// instruction definitions by not requiring the AD writer to specify seperate
+// instruction definitions by not requiring the AD writer to specify separate
 // instructions for every form of operand when the instruction accepts
 // multiple operand types with the same basic encoding and format.  The classic
 // case of this is memory operands.
@@ -5286,55 +5286,91 @@
   ins_cost(MEMORY_REF_COST);
 
   size(4);
-  format %{ "LDSB   $mem,$dst" %}
+  format %{ "LDSB   $mem,$dst\t! byte" %}
+  opcode(Assembler::ldsb_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Byte (8bit signed) into a Long Register
+instruct loadB2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadB mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDSB   $mem,$dst\t! byte -> long" %}
   opcode(Assembler::ldsb_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mask_mem);
 %}
 
-// Load Byte (8bit UNsigned) into an int reg
-instruct loadUB(iRegI dst, memory mem, immI_255 bytemask) %{
-  match(Set dst (AndI (LoadB mem) bytemask));
+// Load Unsigned Byte (8bit UNsigned) into an int reg
+instruct loadUB(iRegI dst, memory mem) %{
+  match(Set dst (LoadUB mem));
   ins_cost(MEMORY_REF_COST);
 
   size(4);
-  format %{ "LDUB   $mem,$dst" %}
+  format %{ "LDUB   $mem,$dst\t! ubyte" %}
+  opcode(Assembler::ldub_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into a Long Register
+instruct loadUB2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUB mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUB   $mem,$dst\t! ubyte -> long" %}
   opcode(Assembler::ldub_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mask_mem);
 %}
 
-// Load Byte (8bit UNsigned) into a Long Register
-instruct loadUBL(iRegL dst, memory mem, immL_FF bytemask) %{
-  match(Set dst (AndL (ConvI2L (LoadB mem)) bytemask));
+// Load Short (16bit signed)
+instruct loadS(iRegI dst, memory mem) %{
+  match(Set dst (LoadS mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDSH   $mem,$dst\t! short" %}
+  opcode(Assembler::ldsh_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Short (16bit signed) into a Long Register
+instruct loadS2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadS mem)));
   ins_cost(MEMORY_REF_COST);
 
   size(4);
-  format %{ "LDUB   $mem,$dst" %}
-  opcode(Assembler::ldub_op3);
+  format %{ "LDSH   $mem,$dst\t! short -> long" %}
+  opcode(Assembler::ldsh_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned)
+instruct loadUS(iRegI dst, memory mem) %{
+  match(Set dst (LoadUS mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUH   $mem,$dst\t! ushort/char" %}
+  opcode(Assembler::lduh_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mask_mem);
 %}
 
 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register
-instruct loadUS2L(iRegL dst, memory mem, immL_FFFF bytemask) %{
-  match(Set dst (AndL (ConvI2L (LoadUS mem)) bytemask));
+instruct loadUS2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUS mem)));
   ins_cost(MEMORY_REF_COST);
 
   size(4);
-  format %{ "LDUH   $mem,$dst" %}
-  opcode(Assembler::lduh_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(iload_mask_mem);
-%}
-
-// Load Unsigned Short/Char (16bit unsigned)
-instruct loadUS(iRegI dst, memory mem) %{
-  match(Set dst (LoadUS mem));
-  ins_cost(MEMORY_REF_COST);
-
-  size(4);
-  format %{ "LDUH   $mem,$dst" %}
+  format %{ "LDUH   $mem,$dst\t! ushort/char -> long" %}
   opcode(Assembler::lduh_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mask_mem);
@@ -5344,9 +5380,33 @@
 instruct loadI(iRegI dst, memory mem) %{
   match(Set dst (LoadI mem));
   ins_cost(MEMORY_REF_COST);
-  size(4);
-
-  format %{ "LDUW   $mem,$dst" %}
+
+  size(4);
+  format %{ "LDUW   $mem,$dst\t! int" %}
+  opcode(Assembler::lduw_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer into a Long Register
+instruct loadI2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadI mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDSW   $mem,$dst\t! int -> long" %}
+  opcode(Assembler::ldsw_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Integer into a Long Register
+instruct loadUI2L(iRegL dst, memory mem) %{
+  match(Set dst (LoadUI2L mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUW   $mem,$dst\t! uint -> long" %}
   opcode(Assembler::lduw_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mem);
@@ -5356,6 +5416,7 @@
 instruct loadL(iRegL dst, memory mem ) %{
   match(Set dst (LoadL mem));
   ins_cost(MEMORY_REF_COST);
+
   size(4);
   format %{ "LDX    $mem,$dst\t! long" %}
   opcode(Assembler::ldx_op3);
@@ -5471,13 +5532,11 @@
 
    format %{ "LDUW   $mem,$dst\t! compressed ptr" %}
    ins_encode %{
-     Register base = as_Register($mem$$base);
-     Register index = as_Register($mem$$index);
-     Register dst = $dst$$Register;
+     Register index = $mem$$index$$Register;
      if (index != G0) {
-       __ lduw(base, index, dst);
+       __ lduw($mem$$base$$Register, index, $dst$$Register);
      } else {
-       __ lduw(base, $mem$$disp, dst);
+       __ lduw($mem$$base$$Register, $mem$$disp, $dst$$Register);
      }
    %}
    ins_pipe(iload_mem);
@@ -5521,18 +5580,6 @@
   ins_pipe(iload_mem);
 %}
 
-// Load Short (16bit signed)
-instruct loadS(iRegI dst, memory mem) %{
-  match(Set dst (LoadS mem));
-  ins_cost(MEMORY_REF_COST);
-
-  size(4);
-  format %{ "LDSH   $mem,$dst" %}
-  opcode(Assembler::ldsh_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(iload_mask_mem);
-%}
-
 // Load Double
 instruct loadD(regD dst, memory mem) %{
   match(Set dst (LoadD mem));
@@ -6847,7 +6894,7 @@
   ins_pipe(sdiv_reg_reg);
 %}
 
-// Magic constant, reciprical of 10
+// Magic constant, reciprocal of 10
 instruct loadConI_x66666667(iRegIsafe dst) %{
   effect( DEF dst );
 
@@ -6857,7 +6904,7 @@
   ins_pipe(ialu_hi_lo_reg);
 %}
 
-// Register Shift Right Arithmatic Long by 32-63
+// Register Shift Right Arithmetic Long by 32-63
 instruct sra_31( iRegI dst, iRegI src ) %{
   effect( DEF dst, USE src );
   format %{ "SRA    $src,31,$dst\t! Used in div-by-10" %}
@@ -9048,7 +9095,7 @@
 // These must follow all instruction definitions as they use the names
 // defined in the instructions definitions.
 //
-// peepmatch ( root_instr_name [preceeding_instruction]* );
+// peepmatch ( root_instr_name [preceding_instruction]* );
 //
 // peepconstraint %{
 // (instruction_number.operand_name relational_op instruction_number.operand_name
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1545,7 +1545,7 @@
 
   // Handle all the JSR stuff here, then exit.
   // It's much shorter and cleaner than intermingling with the
-  // non-JSR normal-branch stuff occuring below.
+  // non-JSR normal-branch stuff occurring below.
   if( is_jsr ) {
     // compute return address as bci in Otos_i
     __ ld_ptr(Address(Lmethod, 0, in_bytes(methodOopDesc::const_offset())), G3_scratch);
@@ -3079,7 +3079,7 @@
     Label ok;
 
     // Check that entry is non-null.  Null entries are probably a bytecode
-    // problem.  If the interface isn't implemented by the reciever class,
+    // problem.  If the interface isn't implemented by the receiver class,
     // the VM should throw IncompatibleClassChangeError.  linkResolver checks
     // this too but that's only if the entry isn't already resolved, so we
     // need to check again.
--- a/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -106,6 +106,15 @@
   __ delayed()->nop();
 
   masm->flush();
+
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  vtable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
+  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+
   s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
@@ -113,9 +122,9 @@
 
 // NOTE:  %%%% if any change is made to this stub make sure that the function
 //             pd_code_size_limit is changed to ensure the correct size for VtableStub
-VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
   const int sparc_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(sparc_code_length) VtableStub(false, vtable_index);
+  VtableStub* s = new(sparc_code_length) VtableStub(false, itable_index);
   ResourceMark rm;
   CodeBuffer cb(s->entry_point(), sparc_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -139,7 +148,6 @@
   // are passed in the %o registers.  Instead, longs are passed in G1 and G4
   // and so those registers are not available here.
   __ save(SP,-frame::register_save_words*wordSize,SP);
-  Register I0_receiver = I0;    // Location of receiver after save
 
 #ifndef PRODUCT
   if (CountCompiledCalls) {
@@ -151,63 +159,31 @@
   }
 #endif /* PRODUCT */
 
-  // load start of itable entries into L0 register
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  __ ld(Address(G3_klassOop, 0, instanceKlass::vtable_length_offset() * wordSize), L0);
-
-  // %%% Could store the aligned, prescaled offset in the klassoop.
-  __ sll(L0, exact_log2(vtableEntry::size() * wordSize), L0);
-  // see code for instanceKlass::start_of_itable!
-  const int vtable_alignment = align_object_offset(1);
-  assert(vtable_alignment == 1 || vtable_alignment == 2, "");
-  const int odd_bit = vtableEntry::size() * wordSize;
-  if (vtable_alignment == 2) {
-    __ and3(L0, odd_bit, L1);   // isolate the odd bit
-  }
-  __ add(G3_klassOop, L0, L0);
-  if (vtable_alignment == 2) {
-    __ add(L0, L1, L0);         // double the odd bit, to align up
-  }
+  Label throw_icce;
 
-  // Loop over all itable entries until desired interfaceOop (G5_interface) found
-  __ bind(search);
-
-  // %%%% Could load both offset and interface in one ldx, if they were
-  // in the opposite order.  This would save a load.
-  __ ld_ptr(L0, base + itableOffsetEntry::interface_offset_in_bytes(), L1);
-
-  // If the entry is NULL then we've reached the end of the table
-  // without finding the expected interface, so throw an exception
-  Label throw_icce;
-  __ bpr(Assembler::rc_z, false, Assembler::pn, L1, throw_icce);
-  __ delayed()->cmp(G5_interface, L1);
-  __ brx(Assembler::notEqual, true, Assembler::pn, search);
-  __ delayed()->add(L0, itableOffsetEntry::size() * wordSize, L0);
-
-  // entry found and L0 points to it, move offset of vtable for interface into L0
-  __ ld(L0, base + itableOffsetEntry::offset_offset_in_bytes(), L0);
-
-  // Compute itableMethodEntry and get methodOop(G5_method) and entrypoint(L0) for compiler
-  const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) + itableMethodEntry::method_offset_in_bytes();
-  __ add(G3_klassOop, L0, L1);
-  __ ld_ptr(L1, method_offset, G5_method);
+  Register L5_method = L5;
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             G3_klassOop, G5_interface, itable_index,
+                             // outputs: method, scan temp. reg
+                             L5_method, L2, L3,
+                             throw_icce);
 
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L01;
-    __ ld_ptr(L1, method_offset, G5_method);
-    __ bpr(Assembler::rc_nz, false, Assembler::pt, G5_method, L01);
+    __ bpr(Assembler::rc_nz, false, Assembler::pt, L5_method, L01);
     __ delayed()->nop();
     __ stop("methodOop is null");
     __ bind(L01);
-    __ verify_oop(G5_method);
+    __ verify_oop(L5_method);
   }
 #endif
 
   // If the following load is through a NULL pointer, we'll take an OS
   // exception that should translate into an AbstractMethodError.  We need the
   // window count to be correct at that time.
-  __ restore();                 // Restore registers BEFORE the AME point
+  __ restore(L5_method, 0, G5_method);
+  // Restore registers *before* the AME point.
 
   address ame_addr = __ pc();   // if the vtable entry is null, the method is abstract
   __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3_scratch);
@@ -225,6 +201,12 @@
 
   masm->flush();
 
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  itable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
   guarantee(__ pc() <= s->code_end(), "overflowed buffer");
 
   s->set_exception_points(npe_addr, ame_addr);
@@ -243,8 +225,7 @@
                         (UseCompressedOops ? 2*BytesPerInstWord : 0);
       return basic + slop;
     } else {
-      // save, ld, ld, sll, and, add, add, ld, cmp, br, add, ld, add, ld, ld, jmp, restore, sethi, jmpl, restore
-      const int basic = (20 LP64_ONLY(+ 6)) * BytesPerInstWord +
+      const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
                         // shift;add for load_klass
                         (UseCompressedOops ? 2*BytesPerInstWord : 0);
       return (basic + slop);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -129,13 +129,19 @@
 // Convert the raw encoding form into the form expected by the constructor for
 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 // that to noreg for the Address constructor.
-Address Address::make_raw(int base, int index, int scale, int disp) {
+Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
+  RelocationHolder rspec;
+  if (disp_is_oop) {
+    rspec = Relocation::spec_simple(relocInfo::oop_type);
+  }
   bool valid_index = index != rsp->encoding();
   if (valid_index) {
     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
+    madr._rspec = rspec;
     return madr;
   } else {
     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
+    madr._rspec = rspec;
     return madr;
   }
 }
@@ -3892,6 +3898,21 @@
   emit_operand(src, dst);
 }
 
+void Assembler::movsbq(Register dst, Address src) {
+  InstructionMark im(this);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xBE);
+  emit_operand(dst, src);
+}
+
+void Assembler::movsbq(Register dst, Register src) {
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xBE);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::movslq(Register dst, int32_t imm32) {
   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
@@ -3925,6 +3946,51 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::movswq(Register dst, Address src) {
+  InstructionMark im(this);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xBF);
+  emit_operand(dst, src);
+}
+
+void Assembler::movswq(Register dst, Register src) {
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xBF);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::movzbq(Register dst, Address src) {
+  InstructionMark im(this);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB6);
+  emit_operand(dst, src);
+}
+
+void Assembler::movzbq(Register dst, Register src) {
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB6);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::movzwq(Register dst, Address src) {
+  InstructionMark im(this);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB7);
+  emit_operand(dst, src);
+}
+
+void Assembler::movzwq(Register dst, Register src) {
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB7);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::negq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
   emit_byte(0xF7);
@@ -6197,8 +6263,11 @@
   return off;
 }
 
-// word => int32 which seems bad for 64bit
-int MacroAssembler::load_signed_word(Register dst, Address src) {
+// Note: load_signed_short used to be called load_signed_word.
+// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
+// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
+// The term "word" in HotSpot means a 32- or 64-bit machine word.
+int MacroAssembler::load_signed_short(Register dst, Address src) {
   int off;
   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
     // This is dubious to me since it seems safe to do a signed 16 => 64 bit
@@ -6207,7 +6276,7 @@
     off = offset();
     movswl(dst, src); // movsxw
   } else {
-    off = load_unsigned_word(dst, src);
+    off = load_unsigned_short(dst, src);
     shll(dst, 16);
     sarl(dst, 16);
   }
@@ -6229,7 +6298,8 @@
   return off;
 }
 
-int MacroAssembler::load_unsigned_word(Register dst, Address src) {
+// Note: load_unsigned_short used to be called load_unsigned_word.
+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
   // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
   // and "3.9 Partial Register Penalties", p. 22).
   int off;
@@ -6244,6 +6314,28 @@
   return off;
 }
 
+void MacroAssembler::load_sized_value(Register dst, Address src,
+                                      int size_in_bytes, bool is_signed) {
+  switch (size_in_bytes ^ (is_signed ? -1 : 0)) {
+#ifndef _LP64
+  // For case 8, caller is responsible for manually loading
+  // the second word into another register.
+  case ~8:  // fall through:
+  case  8:  movl(                dst, src ); break;
+#else
+  case ~8:  // fall through:
+  case  8:  movq(                dst, src ); break;
+#endif
+  case ~4:  // fall through:
+  case  4:  movl(                dst, src ); break;
+  case ~2:  load_signed_short(   dst, src ); break;
+  case  2:  load_unsigned_short( dst, src ); break;
+  case ~1:  load_signed_byte(    dst, src ); break;
+  case  1:  load_unsigned_byte(  dst, src ); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
   if (reachable(dst)) {
     movl(as_Address(dst), src);
@@ -6463,7 +6555,8 @@
   Address index(noreg, tmp, Address::times_1);
   ExternalAddress page(os::get_memory_serialize_page());
 
-  movptr(ArrayAddress(page, index), tmp);
+  // Size of store must match masking code above
+  movl(as_Address(ArrayAddress(page, index)), tmp);
 }
 
 // Calls to C land
@@ -7049,6 +7142,81 @@
 }
 
 
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Label& L_no_such_interface) {
+  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+  Address::ScaleFactor times_vte_scale = Address::times_ptr;
+  assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+  movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
+
+  // %%% Could store the aligned, prescaled offset in the klassoop.
+  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
+  if (HeapWordsPerLong > 1) {
+    // Round up to align_object_offset boundary
+    // see code for instanceKlass::start_of_itable!
+    round_to(scan_temp, BytesPerLong);
+  }
+
+  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
+
+  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
+  //   if (scan->interface() == intf) {
+  //     result = (klass + scan->offset() + itable_index);
+  //   }
+  // }
+  Label search, found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+    cmpptr(intf_klass, method_result);
+
+    if (peel) {
+      jccb(Assembler::equal, found_method);
+    } else {
+      jccb(Assembler::notEqual, search);
+      // (invert the test to fall through to found_method...)
+    }
+
+    if (!peel)  break;
+
+    bind(search);
+
+    // Check that the previous entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    testptr(method_result, method_result);
+    jcc(Assembler::zero, L_no_such_interface);
+    addptr(scan_temp, scan_step);
+  }
+
+  bind(found_method);
+
+  // Got a hit.
+  movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
+  movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
+}
+
+
 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
   ucomisd(dst, as_Address(src));
 }
@@ -7094,6 +7262,31 @@
 }
 
 
+RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr,
+                                               Register tmp,
+                                               int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  movptr(tmp, ExternalAddress((address) delayed_value_addr));
+
+#ifdef ASSERT
+  Label L;
+  testl(tmp, tmp);
+  jccb(Assembler::notZero, L);
+  hlt();
+  bind(L);
+#endif
+
+  if (offset != 0)
+    addptr(tmp, offset);
+
+  return RegisterConstant(tmp);
+}
+
+
 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
   if (!VerifyOops) return;
 
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -153,6 +153,21 @@
     times_8  =  3,
     times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
   };
+  static ScaleFactor times(int size) {
+    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
+    if (size == 8)  return times_8;
+    if (size == 4)  return times_4;
+    if (size == 2)  return times_2;
+    return times_1;
+  }
+  static int scale_size(ScaleFactor scale) {
+    assert(scale != no_scale, "");
+    assert(((1 << (int)times_1) == 1 &&
+            (1 << (int)times_2) == 2 &&
+            (1 << (int)times_4) == 4 &&
+            (1 << (int)times_8) == 8), "");
+    return (1 << (int)scale);
+  }
 
  private:
   Register         _base;
@@ -197,6 +212,22 @@
            "inconsistent address");
   }
 
+  Address(Register base, RegisterConstant index, ScaleFactor scale = times_1, int disp = 0)
+    : _base (base),
+      _index(index.register_or_noreg()),
+      _scale(scale),
+      _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
+    if (!index.is_register())  scale = Address::no_scale;
+    assert(!_index->is_valid() == (scale == Address::no_scale),
+           "inconsistent address");
+  }
+
+  Address plus_disp(int disp) const {
+    Address a = (*this);
+    a._disp += disp;
+    return a;
+  }
+
   // The following two overloads are used in connection with the
   // ByteSize type (see sizes.hpp).  They simplify the use of
   // ByteSize'd arguments in assembly code. Note that their equivalent
@@ -224,6 +255,17 @@
     assert(!index->is_valid() == (scale == Address::no_scale),
            "inconsistent address");
   }
+
+  Address(Register base, RegisterConstant index, ScaleFactor scale, ByteSize disp)
+    : _base (base),
+      _index(index.register_or_noreg()),
+      _scale(scale),
+      _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
+    if (!index.is_register())  scale = Address::no_scale;
+    assert(!_index->is_valid() == (scale == Address::no_scale),
+           "inconsistent address");
+  }
+
 #endif // ASSERT
 
   // accessors
@@ -236,11 +278,10 @@
   // Convert the raw encoding form into the form expected by the constructor for
   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
   // that to noreg for the Address constructor.
-  static Address make_raw(int base, int index, int scale, int disp);
+  static Address make_raw(int base, int index, int scale, int disp, bool disp_is_oop);
 
   static Address make_array(ArrayAddress);
 
-
  private:
   bool base_needs_rex() const {
     return _base != noreg && _base->encoding() >= 8;
@@ -1097,6 +1138,9 @@
   void movsbl(Register dst, Register src);
 
 #ifdef _LP64
+  void movsbq(Register dst, Address src);
+  void movsbq(Register dst, Register src);
+
   // Move signed 32bit immediate to 64bit extending sign
   void movslq(Address dst, int32_t imm64);
   void movslq(Register dst, int32_t imm64);
@@ -1109,6 +1153,11 @@
   void movswl(Register dst, Address src);
   void movswl(Register dst, Register src);
 
+#ifdef _LP64
+  void movswq(Register dst, Address src);
+  void movswq(Register dst, Register src);
+#endif
+
   void movw(Address dst, int imm16);
   void movw(Register dst, Address src);
   void movw(Address dst, Register src);
@@ -1116,9 +1165,19 @@
   void movzbl(Register dst, Address src);
   void movzbl(Register dst, Register src);
 
+#ifdef _LP64
+  void movzbq(Register dst, Address src);
+  void movzbq(Register dst, Register src);
+#endif
+
   void movzwl(Register dst, Address src);
   void movzwl(Register dst, Register src);
 
+#ifdef _LP64
+  void movzwq(Register dst, Address src);
+  void movzwq(Register dst, Register src);
+#endif
+
   void mull(Address src);
   void mull(Register src);
 
@@ -1393,17 +1452,20 @@
 
   // The following 4 methods return the offset of the appropriate move instruction
 
-  // Support for fast byte/word loading with zero extension (depending on particular CPU)
+  // Support for fast byte/short loading with zero extension (depending on particular CPU)
   int load_unsigned_byte(Register dst, Address src);
-  int load_unsigned_word(Register dst, Address src);
-
-  // Support for fast byte/word loading with sign extension (depending on particular CPU)
+  int load_unsigned_short(Register dst, Address src);
+
+  // Support for fast byte/short loading with sign extension (depending on particular CPU)
   int load_signed_byte(Register dst, Address src);
-  int load_signed_word(Register dst, Address src);
+  int load_signed_short(Register dst, Address src);
 
   // Support for sign-extension (hi:lo = extend_sign(lo))
   void extend_sign(Register hi, Register lo);
 
+  // Loading values by size and signed-ness
+  void load_sized_value(Register dst, Address src, int size_in_bytes, bool is_signed);
+
   // Support for inc/dec with optimal instruction selection depending on value
 
   void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
@@ -1721,6 +1783,14 @@
   );
   void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
 
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterConstant itable_index,
+                               Register method_result,
+                               Register scan_temp,
+                               Label& no_such_interface);
+
   //----
   void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
 
@@ -1763,6 +1833,10 @@
   // stack overflow + shadow pages.  Also, clobbers tmp
   void bang_stack_size(Register size, Register tmp);
 
+  virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr,
+                                         Register tmp,
+                                         int offset);
+
   // Support for serializing memory accesses between threads
   void serialize_memory(Register thread, Register tmp);
 
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -554,8 +554,8 @@
   __ jcc (Assembler::zero, noLoop);
 
   // compare first characters
-  __ load_unsigned_word(rcx, Address(rdi, 0));
-  __ load_unsigned_word(rbx, Address(rsi, 0));
+  __ load_unsigned_short(rcx, Address(rdi, 0));
+  __ load_unsigned_short(rbx, Address(rsi, 0));
   __ subl(rcx, rbx);
   __ jcc(Assembler::notZero, haveResult);
   // starting loop
@@ -574,8 +574,8 @@
   Label loop;
   __ align(wordSize);
   __ bind(loop);
-  __ load_unsigned_word(rcx, Address(rdi, rax, Address::times_2, 0));
-  __ load_unsigned_word(rbx, Address(rsi, rax, Address::times_2, 0));
+  __ load_unsigned_short(rcx, Address(rdi, rax, Address::times_2, 0));
+  __ load_unsigned_short(rbx, Address(rsi, rax, Address::times_2, 0));
   __ subl(rcx, rbx);
   __ jcc(Assembler::notZero, haveResult);
   __ increment(rax);
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -501,7 +501,7 @@
     LIRItem right(x->y(), this);
 
     left.load_item();
-    // dont load constants to save register
+    // don't load constants to save register
     right.load_nonconstant();
     rlock_result(x);
     arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
--- a/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -513,7 +513,7 @@
     // compute full expression stack limit
 
     const Address size_of_stack    (rbx, methodOopDesc::max_stack_offset());
-    __ load_unsigned_word(rdx, size_of_stack);                            // get size of expression stack in words
+    __ load_unsigned_short(rdx, size_of_stack);                           // get size of expression stack in words
     __ negptr(rdx);                                                       // so we can subtract in next step
     // Allocate expression stack
     __ lea(rsp, Address(rsp, rdx, Address::times_ptr));
@@ -523,7 +523,7 @@
 #ifdef _LP64
   // Make sure stack is properly aligned and sized for the abi
   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
-  __ andptr(rsp, -16); // must be 16 byte boundry (see amd64 ABI)
+  __ andptr(rsp, -16); // must be 16 byte boundary (see amd64 ABI)
 #endif // _LP64
 
 
@@ -659,7 +659,7 @@
     // Always give one monitor to allow us to start interp if sync method.
     // Any additional monitors need a check when moving the expression stack
     const int one_monitor = frame::interpreter_frame_monitor_size() * wordSize;
-  __ load_unsigned_word(rax, size_of_stack);                            // get size of expression stack in words
+  __ load_unsigned_short(rax, size_of_stack);                           // get size of expression stack in words
   __ lea(rax, Address(noreg, rax, Interpreter::stackElementScale(), one_monitor));
   __ lea(rax, Address(rax, rdx, Interpreter::stackElementScale(), overhead_size));
 
@@ -863,13 +863,13 @@
     __ bind(notByte);
     __ cmpl(rdx, stos);
     __ jcc(Assembler::notEqual, notShort);
-    __ load_signed_word(rax, field_address);
+    __ load_signed_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notShort);
     __ cmpl(rdx, ctos);
     __ jcc(Assembler::notEqual, notChar);
-    __ load_unsigned_word(rax, field_address);
+    __ load_unsigned_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notChar);
@@ -937,7 +937,7 @@
   const Register locals = rdi;
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: methodOop
   // rcx: size of parameters
@@ -970,7 +970,7 @@
 #ifdef _LP64
     // duplicate the alignment rsp got after setting stack_base
     __ subptr(rax, frame::arg_reg_save_area_bytes); // windows
-    __ andptr(rax, -16); // must be 16 byte boundry (see amd64 ABI)
+    __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI)
 #endif // _LP64
     __ cmpptr(rax, rsp);
     __ jcc(Assembler::equal, L);
@@ -1062,12 +1062,12 @@
   // allocate space for parameters
   __ movptr(method, STATE(_method));
   __ verify_oop(method);
-  __ load_unsigned_word(t, Address(method, methodOopDesc::size_of_parameters_offset()));
+  __ load_unsigned_short(t, Address(method, methodOopDesc::size_of_parameters_offset()));
   __ shll(t, 2);
 #ifdef _LP64
   __ subptr(rsp, t);
   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
-  __ andptr(rsp, -16); // must be 16 byte boundry (see amd64 ABI)
+  __ andptr(rsp, -16); // must be 16 byte boundary (see amd64 ABI)
 #else
   __ addptr(t, 2*wordSize);     // allocate two more slots for JNIEnv and possible mirror
   __ subptr(rsp, t);
@@ -1659,11 +1659,11 @@
   // const Address monitor(rbp, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: methodOop
   // rcx: size of parameters
-  __ load_unsigned_word(rdx, size_of_locals);                      // get size of locals in words
+  __ load_unsigned_short(rdx, size_of_locals);                     // get size of locals in words
 
   __ subptr(rdx, rcx);                                             // rdx = no. of additional locals
 
@@ -1949,7 +1949,7 @@
   __ movptr(rbx, STATE(_result._to_call._callee));
 
   // callee left args on top of expression stack, remove them
-  __ load_unsigned_word(rcx, Address(rbx, methodOopDesc::size_of_parameters_offset()));
+  __ load_unsigned_short(rcx, Address(rbx, methodOopDesc::size_of_parameters_offset()));
   __ lea(rsp, Address(rsp, rcx, Address::times_ptr));
 
   __ movl(rcx, Address(rbx, methodOopDesc::result_index_offset()));
@@ -2119,7 +2119,7 @@
   // Make it look like call_stub calling conventions
 
   // Get (potential) receiver
-  __ load_unsigned_word(rcx, size_of_parameters);                     // get size of parameters in words
+  __ load_unsigned_short(rcx, size_of_parameters);                   // get size of parameters in words
 
   ExternalAddress recursive(CAST_FROM_FN_PTR(address, RecursiveInterpreterActivation));
   __ pushptr(recursive.addr());                                      // make it look good in the debugger
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_32.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_32.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -192,7 +192,7 @@
 void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
   assert(cache != index, "must use different registers");
-  load_unsigned_word(index, Address(rsi, bcp_offset));
+  load_unsigned_short(index, Address(rsi, bcp_offset));
   movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
   assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below");
   shlptr(index, 2); // convert from field index to ConstantPoolCacheEntry index
@@ -202,7 +202,7 @@
 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
   assert(cache != tmp, "must use different register");
-  load_unsigned_word(tmp, Address(rsi, bcp_offset));
+  load_unsigned_short(tmp, Address(rsi, bcp_offset));
   assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below");
                                // convert from field index to ConstantPoolCacheEntry index
                                // and from word offset to byte offset
@@ -1031,7 +1031,7 @@
 
   // If the mdp is valid, it will point to a DataLayout header which is
   // consistent with the bcp.  The converse is highly probable also.
-  load_unsigned_word(rdx, Address(rcx, in_bytes(DataLayout::bci_offset())));
+  load_unsigned_short(rdx, Address(rcx, in_bytes(DataLayout::bci_offset())));
   addptr(rdx, Address(rbx, methodOopDesc::const_offset()));
   lea(rdx, Address(rdx, constMethodOopDesc::codes_offset()));
   cmpptr(rdx, rsi);
@@ -1512,6 +1512,15 @@
     call_VM_leaf(
       CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), rcx, rbx);
   }
+
+  // RedefineClasses() tracing support for obsolete method entry
+  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+    get_thread(rcx);
+    get_method(rbx);
+    call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+      rcx, rbx);
+  }
 }
 
 
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -190,7 +190,7 @@
                                                            int bcp_offset) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
   assert(cache != index, "must use different registers");
-  load_unsigned_word(index, Address(r13, bcp_offset));
+  load_unsigned_short(index, Address(r13, bcp_offset));
   movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
   // convert from field index to ConstantPoolCacheEntry index
@@ -203,7 +203,7 @@
                                                                int bcp_offset) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
   assert(cache != tmp, "must use different register");
-  load_unsigned_word(tmp, Address(r13, bcp_offset));
+  load_unsigned_short(tmp, Address(r13, bcp_offset));
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
   // convert from field index to ConstantPoolCacheEntry index
   // and from word offset to byte offset
@@ -1063,8 +1063,8 @@
 
   // If the mdp is valid, it will point to a DataLayout header which is
   // consistent with the bcp.  The converse is highly probable also.
-  load_unsigned_word(c_rarg2,
-                     Address(c_rarg3, in_bytes(DataLayout::bci_offset())));
+  load_unsigned_short(c_rarg2,
+                      Address(c_rarg3, in_bytes(DataLayout::bci_offset())));
   addptr(c_rarg2, Address(rbx, methodOopDesc::const_offset()));
   lea(c_rarg2, Address(c_rarg2, constMethodOopDesc::codes_offset()));
   cmpptr(c_rarg2, r13);
@@ -1593,6 +1593,14 @@
     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
                  r15_thread, c_rarg1);
   }
+
+  // RedefineClasses() tracing support for obsolete method entry
+  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+    get_method(c_rarg1);
+    call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+      r15_thread, c_rarg1);
+  }
 }
 
 
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1534,6 +1534,13 @@
          thread, rax);
   }
 
+  // RedefineClasses() tracing support for obsolete method entry
+  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+    __ movoop(rax, JNIHandles::make_local(method()));
+    __ call_VM_leaf(
+         CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+         thread, rax);
+  }
 
   // These are register definitions we need for locking/unlocking
   const Register swap_reg = rax;  // Must use rax, for cmpxchg instruction
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1350,7 +1350,7 @@
     {
       Label L;
       __ mov(rax, rsp);
-      __ andptr(rax, -16); // must be 16 byte boundry (see amd64 ABI)
+      __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI)
       __ cmpptr(rax, rsp);
       __ jcc(Assembler::equal, L);
       __ stop("improperly aligned stack");
@@ -1508,6 +1508,17 @@
     restore_args(masm, total_c_args, c_arg, out_regs);
   }
 
+  // RedefineClasses() tracing support for obsolete method entry
+  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+    __ movoop(c_rarg1, JNIHandles::make_local(method()));
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+      r15_thread, c_rarg1);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+  }
+
   // Lock a synchronized method
 
   // Register definitions used by locking and unlocking
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -662,13 +662,13 @@
     __ bind(notByte);
     __ cmpl(rdx, stos);
     __ jcc(Assembler::notEqual, notShort);
-    __ load_signed_word(rax, field_address);
+    __ load_signed_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notShort);
     __ cmpl(rdx, ctos);
     __ jcc(Assembler::notEqual, notChar);
-    __ load_unsigned_word(rax, field_address);
+    __ load_unsigned_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notChar);
@@ -723,7 +723,7 @@
   const Address access_flags      (rbx, methodOopDesc::access_flags_offset());
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // native calls don't need the stack size check since they have no expression stack
   // and the arguments are already on the stack and we only add a handful of words
@@ -838,7 +838,7 @@
   // allocate space for parameters
   __ get_method(method);
   __ verify_oop(method);
-  __ load_unsigned_word(t, Address(method, methodOopDesc::size_of_parameters_offset()));
+  __ load_unsigned_short(t, Address(method, methodOopDesc::size_of_parameters_offset()));
   __ shlptr(t, Interpreter::logStackElementSize());
   __ addptr(t, 2*wordSize);     // allocate two more slots for JNIEnv and possible mirror
   __ subptr(rsp, t);
@@ -1155,14 +1155,14 @@
   const Address access_flags      (rbx, methodOopDesc::access_flags_offset());
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx,: methodOop
   // rcx: size of parameters
 
   // rsi: sender_sp (could differ from sp+wordSize if we were called via c2i )
 
-  __ load_unsigned_word(rdx, size_of_locals);       // get size of locals in words
+  __ load_unsigned_short(rdx, size_of_locals);       // get size of locals in words
   __ subl(rdx, rcx);                                // rdx = no. of additional locals
 
   // see if we've got enough room on the stack for locals plus overhead.
@@ -1558,7 +1558,7 @@
     // Compute size of arguments for saving when returning to deoptimized caller
     __ get_method(rax);
     __ verify_oop(rax);
-    __ load_unsigned_word(rax, Address(rax, in_bytes(methodOopDesc::size_of_parameters_offset())));
+    __ load_unsigned_short(rax, Address(rax, in_bytes(methodOopDesc::size_of_parameters_offset())));
     __ shlptr(rax, Interpreter::logStackElementSize());
     __ restore_locals();
     __ subptr(rdi, rax);
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -650,7 +650,7 @@
     __ cmpl(rdx, stos);
     __ jcc(Assembler::notEqual, notShort);
     // stos
-    __ load_signed_word(rax, field_address);
+    __ load_signed_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notShort);
@@ -662,7 +662,7 @@
     __ bind(okay);
 #endif
     // ctos
-    __ load_unsigned_word(rax, field_address);
+    __ load_unsigned_short(rax, field_address);
 
     __ bind(xreturn_path);
 
@@ -702,7 +702,7 @@
   const Address access_flags      (rbx, methodOopDesc::access_flags_offset());
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // native calls don't need the stack size check since they have no
   // expression stack and the arguments are already on the stack and
@@ -819,14 +819,14 @@
   // allocate space for parameters
   __ get_method(method);
   __ verify_oop(method);
-  __ load_unsigned_word(t,
-                        Address(method,
-                                methodOopDesc::size_of_parameters_offset()));
+  __ load_unsigned_short(t,
+                         Address(method,
+                                 methodOopDesc::size_of_parameters_offset()));
   __ shll(t, Interpreter::logStackElementSize());
 
   __ subptr(rsp, t);
   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
-  __ andptr(rsp, -16); // must be 16 byte boundry (see amd64 ABI)
+  __ andptr(rsp, -16); // must be 16 byte boundary (see amd64 ABI)
 
   // get signature handler
   {
@@ -1165,13 +1165,13 @@
   const Address access_flags(rbx, methodOopDesc::access_flags_offset());
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: methodOop
   // rcx: size of parameters
   // r13: sender_sp (could differ from sp+wordSize if we were called via c2i )
 
-  __ load_unsigned_word(rdx, size_of_locals); // get size of locals in words
+  __ load_unsigned_short(rdx, size_of_locals); // get size of locals in words
   __ subl(rdx, rcx); // rdx = no. of additional locals
 
   // YYY
@@ -1583,7 +1583,7 @@
     // Compute size of arguments for saving when returning to
     // deoptimized caller
     __ get_method(rax);
-    __ load_unsigned_word(rax, Address(rax, in_bytes(methodOopDesc::
+    __ load_unsigned_short(rax, Address(rax, in_bytes(methodOopDesc::
                                                 size_of_parameters_offset())));
     __ shll(rax, Interpreter::logStackElementSize());
     __ restore_locals(); // XXX do we need this?
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -296,7 +296,7 @@
 
 void TemplateTable::sipush() {
   transition(vtos, itos);
-  __ load_unsigned_word(rax, at_bcp(1));
+  __ load_unsigned_short(rax, at_bcp(1));
   __ bswapl(rax);
   __ sarl(rax, 16);
 }
@@ -662,7 +662,7 @@
   index_check(rdx, rax);  // kills rbx,
   // rax,: index
   // can do better code for P5 - may want to improve this at some point
-  __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+  __ load_unsigned_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   __ mov(rax, rbx);
 }
 
@@ -677,7 +677,7 @@
   // rdx: array
   index_check(rdx, rax);
   // rax,: index
-  __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+  __ load_unsigned_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   __ mov(rax, rbx);
 }
 
@@ -687,7 +687,7 @@
   index_check(rdx, rax);  // kills rbx,
   // rax,: index
   // can do better code for P5 - may want to improve this at some point
-  __ load_signed_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
+  __ load_signed_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
   __ mov(rax, rbx);
 }
 
@@ -1586,7 +1586,7 @@
 
   // Handle all the JSR stuff here, then exit.
   // It's much shorter and cleaner than intermingling with the
-  // non-JSR normal-branch stuff occuring below.
+  // non-JSR normal-branch stuff occurring below.
   if (is_jsr) {
     // Pre-load the next target bytecode into EBX
     __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1, 0));
@@ -2310,7 +2310,7 @@
   __ cmpl(flags, ctos );
   __ jcc(Assembler::notEqual, notChar);
 
-  __ load_unsigned_word(rax, lo );
+  __ load_unsigned_short(rax, lo );
   __ push(ctos);
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_cgetfield, rcx, rbx);
@@ -2322,7 +2322,7 @@
   __ cmpl(flags, stos );
   __ jcc(Assembler::notEqual, notShort);
 
-  __ load_signed_word(rax, lo );
+  __ load_signed_short(rax, lo );
   __ push(stos);
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_sgetfield, rcx, rbx);
@@ -2830,8 +2830,8 @@
   // access field
   switch (bytecode()) {
     case Bytecodes::_fast_bgetfield: __ movsbl(rax, lo );                 break;
-    case Bytecodes::_fast_sgetfield: __ load_signed_word(rax, lo );       break;
-    case Bytecodes::_fast_cgetfield: __ load_unsigned_word(rax, lo );     break;
+    case Bytecodes::_fast_sgetfield: __ load_signed_short(rax, lo );      break;
+    case Bytecodes::_fast_cgetfield: __ load_unsigned_short(rax, lo );    break;
     case Bytecodes::_fast_igetfield: __ movl(rax, lo);                    break;
     case Bytecodes::_fast_lgetfield: __ stop("should not be rewritten");  break;
     case Bytecodes::_fast_fgetfield: __ fld_s(lo);                        break;
@@ -3055,35 +3055,44 @@
   // profile this call
   __ profile_virtual_call(rdx, rsi, rdi);
 
-  __ mov(rdi, rdx); // Save klassOop in rdi
-
-  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == (1 << (int)Address::times_ptr), "adjust the scaling in the code below");
-  __ movl(rsi, Address(rdx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable
-  __ lea(rdx, Address(rdx, rsi, Address::times_4, base));
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    __ round_to(rdx, BytesPerLong);
-  }
-
-  Label entry, search, interface_ok;
-
-  __ jmpb(entry);
-  __ bind(search);
-  __ addptr(rdx, itableOffsetEntry::size() * wordSize);
-
-  __ bind(entry);
-
-  // Check that the entry is non-null.  A null entry means that the receiver
-  // class doesn't implement the interface, and wasn't the same as the
-  // receiver class checked when the interface was resolved.
-  __ push(rdx);
-  __ movptr(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testptr(rdx, rdx);
-  __ jcc(Assembler::notZero, interface_ok);
+  Label no_such_interface, no_such_method;
+
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             rdx, rax, rbx,
+                             // outputs: method, scan temp. reg
+                             rbx, rsi,
+                             no_such_interface);
+
+  // rbx,: methodOop to call
+  // rcx: receiver
+  // Check for abstract method error
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  //       interpreter entry point and a conditional jump to it in case of a null
+  //       method.
+  __ testptr(rbx, rbx);
+  __ jcc(Assembler::zero, no_such_method);
+
+  // do the call
+  // rcx: receiver
+  // rbx,: methodOop
+  __ jump_from_interpreted(rbx, rdx);
+  __ should_not_reach_here();
+
+  // exception handling code follows...
+  // note: must restore interpreter registers to canonical
+  //       state for exception handling to work correctly!
+
+  __ bind(no_such_method);
   // throw exception
-  __ pop(rdx);           // pop saved register first.
+  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
+  __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+  // throw exception
   __ pop(rbx);           // pop return address (pushed by prepare_invoke)
   __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
@@ -3091,42 +3100,6 @@
                    InterpreterRuntime::throw_IncompatibleClassChangeError));
   // the call_VM checks for exception, so we should never return here.
   __ should_not_reach_here();
-  __ bind(interface_ok);
-
-    __ pop(rdx);
-
-    __ cmpptr(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-    __ jcc(Assembler::notEqual, search);
-
-    __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));
-    __ addptr(rdx, rdi); // Add offset to klassOop
-    assert(itableMethodEntry::size() * wordSize == (1 << (int)Address::times_ptr), "adjust the scaling in the code below");
-    __ movptr(rbx, Address(rdx, rbx, Address::times_ptr));
-    // rbx,: methodOop to call
-    // rcx: receiver
-    // Check for abstract method error
-    // Note: This should be done more efficiently via a throw_abstract_method_error
-    //       interpreter entry point and a conditional jump to it in case of a null
-    //       method.
-    { Label L;
-      __ testptr(rbx, rbx);
-      __ jcc(Assembler::notZero, L);
-      // throw exception
-          // note: must restore interpreter registers to canonical
-          //       state for exception handling to work correctly!
-          __ pop(rbx);           // pop return address (pushed by prepare_invoke)
-          __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
-          __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
-      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
-      // the call_VM checks for exception, so we should never return here.
-      __ should_not_reach_here();
-      __ bind(L);
-    }
-
-  // do the call
-  // rcx: receiver
-  // rbx,: methodOop
-  __ jump_from_interpreted(rbx, rdx);
 }
 
 //----------------------------------------------------------------------------------------------------
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -307,7 +307,7 @@
 
 void TemplateTable::sipush() {
   transition(vtos, itos);
-  __ load_unsigned_word(rax, at_bcp(1));
+  __ load_unsigned_short(rax, at_bcp(1));
   __ bswapl(rax);
   __ sarl(rax, 16);
 }
@@ -645,10 +645,10 @@
   // eax: index
   // rdx: array
   index_check(rdx, rax); // kills rbx
-  __ load_unsigned_word(rax,
-                        Address(rdx, rax,
-                                Address::times_2,
-                                arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+  __ load_unsigned_short(rax,
+                         Address(rdx, rax,
+                                 Address::times_2,
+                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 }
 
 // iload followed by caload frequent pair
@@ -663,10 +663,10 @@
   // rdx: array
   __ pop_ptr(rdx);
   index_check(rdx, rax); // kills rbx
-  __ load_unsigned_word(rax,
-                        Address(rdx, rax,
-                                Address::times_2,
-                                arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+  __ load_unsigned_short(rax,
+                         Address(rdx, rax,
+                                 Address::times_2,
+                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 }
 
 void TemplateTable::saload() {
@@ -675,10 +675,10 @@
   // eax: index
   // rdx: array
   index_check(rdx, rax); // kills rbx
-  __ load_signed_word(rax,
-                      Address(rdx, rax,
-                              Address::times_2,
-                              arrayOopDesc::base_offset_in_bytes(T_SHORT)));
+  __ load_signed_short(rax,
+                       Address(rdx, rax,
+                               Address::times_2,
+                               arrayOopDesc::base_offset_in_bytes(T_SHORT)));
 }
 
 void TemplateTable::iload(int n) {
@@ -1559,7 +1559,7 @@
 
   // Handle all the JSR stuff here, then exit.
   // It's much shorter and cleaner than intermingling with the non-JSR
-  // normal-branch stuff occuring below.
+  // normal-branch stuff occurring below.
   if (is_jsr) {
     // Pre-load the next target bytecode into rbx
     __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1, 0));
@@ -2276,7 +2276,7 @@
   __ cmpl(flags, ctos);
   __ jcc(Assembler::notEqual, notChar);
   // ctos
-  __ load_unsigned_word(rax, field);
+  __ load_unsigned_short(rax, field);
   __ push(ctos);
   // Rewrite bytecode to be faster
   if (!is_static) {
@@ -2288,7 +2288,7 @@
   __ cmpl(flags, stos);
   __ jcc(Assembler::notEqual, notShort);
   // stos
-  __ load_signed_word(rax, field);
+  __ load_signed_short(rax, field);
   __ push(stos);
   // Rewrite bytecode to be faster
   if (!is_static) {
@@ -2751,10 +2751,10 @@
     __ movsbl(rax, field);
     break;
   case Bytecodes::_fast_sgetfield:
-    __ load_signed_word(rax, field);
+    __ load_signed_short(rax, field);
     break;
   case Bytecodes::_fast_cgetfield:
-    __ load_unsigned_word(rax, field);
+    __ load_unsigned_short(rax, field);
     break;
   case Bytecodes::_fast_fgetfield:
     __ movflt(xmm0, field);
@@ -3010,97 +3010,55 @@
   // profile this call
   __ profile_virtual_call(rdx, r13, r14);
 
-  __ mov(r14, rdx); // Save klassOop in r14
-
-  // Compute start of first itableOffsetEntry (which is at the end of
-  // the vtable)
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  // Get length of vtable
-  assert(vtableEntry::size() * wordSize == 8,
-         "adjust the scaling in the code below");
-  __ movl(r13, Address(rdx,
-                       instanceKlass::vtable_length_offset() * wordSize));
-  __ lea(rdx, Address(rdx, r13, Address::times_8, base));
-
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    __ round_to(rdx, BytesPerLong);
-  }
-
-  Label entry, search, interface_ok;
-
-  __ jmpb(entry);
-  __ bind(search);
-  __ addptr(rdx, itableOffsetEntry::size() * wordSize);
-
-  __ bind(entry);
-
-  // Check that the entry is non-null.  A null entry means that the
-  // receiver class doesn't implement the interface, and wasn't the
-  // same as the receiver class checked when the interface was
-  // resolved.
-  __ push(rdx);
-  __ movptr(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testptr(rdx, rdx);
-  __ jcc(Assembler::notZero, interface_ok);
+  Label no_such_interface, no_such_method;
+
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             rdx, rax, rbx,
+                             // outputs: method, scan temp. reg
+                             rbx, r13,
+                             no_such_interface);
+
+  // rbx,: methodOop to call
+  // rcx: receiver
+  // Check for abstract method error
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  //       interpreter entry point and a conditional jump to it in case of a null
+  //       method.
+  __ testptr(rbx, rbx);
+  __ jcc(Assembler::zero, no_such_method);
+
+  // do the call
+  // rcx: receiver
+  // rbx,: methodOop
+  __ jump_from_interpreted(rbx, rdx);
+  __ should_not_reach_here();
+
+  // exception handling code follows...
+  // note: must restore interpreter registers to canonical
+  //       state for exception handling to work correctly!
+
+  __ bind(no_such_method);
   // throw exception
-  __ pop(rdx); // pop saved register first.
-  __ pop(rbx); // pop return address (pushed by prepare_invoke)
-  __ restore_bcp(); // r13 must be correct for exception handler (was
-                    // destroyed)
-  __ restore_locals(); // make sure locals pointer is correct as well
-                       // (was destroyed)
+  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
+  __ restore_bcp();      // r13 must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+  // throw exception
+  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
+  __ restore_bcp();      // r13 must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
                    InterpreterRuntime::throw_IncompatibleClassChangeError));
   // the call_VM checks for exception, so we should never return here.
   __ should_not_reach_here();
-  __ bind(interface_ok);
-
-  __ pop(rdx);
-
-  __ cmpptr(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ jcc(Assembler::notEqual, search);
-
-  __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));
-
-  __ addptr(rdx, r14); // Add offset to klassOop
-  assert(itableMethodEntry::size() * wordSize == 8,
-         "adjust the scaling in the code below");
-  __ movptr(rbx, Address(rdx, rbx, Address::times_8));
-  // rbx: methodOop to call
-  // rcx: receiver
-  // Check for abstract method error
-  // Note: This should be done more efficiently via a
-  // throw_abstract_method_error interpreter entry point and a
-  // conditional jump to it in case of a null method.
-  {
-    Label L;
-    __ testptr(rbx, rbx);
-    __ jcc(Assembler::notZero, L);
-    // throw exception
-    // note: must restore interpreter registers to canonical
-    //       state for exception handling to work correctly!
-    __ pop(rbx);  // pop return address (pushed by prepare_invoke)
-    __ restore_bcp(); // r13 must be correct for exception handler
-                      // (was destroyed)
-    __ restore_locals(); // make sure locals pointer is correct as
-                         // well (was destroyed)
-    __ call_VM(noreg,
-               CAST_FROM_FN_PTR(address,
-                             InterpreterRuntime::throw_AbstractMethodError));
-    // the call_VM checks for exception, so we should never return here.
-    __ should_not_reach_here();
-    __ bind(L);
-  }
-
-  __ movptr(rcx, Address(rbx, methodOopDesc::interpreter_entry_offset()));
-
-  // do the call
-  // rcx: receiver
-  // rbx: methodOop
-  __ jump_from_interpreted(rbx, rdx);
+  return;
 }
 
+
 //-----------------------------------------------------------------------------
 // Allocation
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,514 @@
+/*
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_vm_version_x86.cpp.incl"
+
+
+int VM_Version::_cpu;
+int VM_Version::_model;
+int VM_Version::_stepping;
+int VM_Version::_cpuFeatures;
+const char*           VM_Version::_features_str = "";
+VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
+
+static BufferBlob* stub_blob;
+static const int stub_size = 300;
+
+extern "C" {
+  typedef void (*getPsrInfo_stub_t)(void*);
+}
+static getPsrInfo_stub_t getPsrInfo_stub = NULL;
+
+
+class VM_Version_StubGenerator: public StubCodeGenerator {
+ public:
+
+  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+  address generate_getPsrInfo() {
+    // Flags to test CPU type.
+    const uint32_t EFL_AC           = 0x40000;
+    const uint32_t EFL_ID           = 0x200000;
+    // Values for when we don't have a CPUID instruction.
+    const int      CPU_FAMILY_SHIFT = 8;
+    const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
+    const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
+
+    Label detect_486, cpu486, detect_586, std_cpuid1;
+    Label ext_cpuid1, ext_cpuid5, done;
+
+    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
+#   define __ _masm->
+
+    address start = __ pc();
+
+    //
+    // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
+    //
+    // LP64: rcx and rdx are first and second argument registers on windows
+
+    __ push(rbp);
+#ifdef _LP64
+    __ mov(rbp, c_rarg0); // cpuid_info address
+#else
+    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
+#endif
+    __ push(rbx);
+    __ push(rsi);
+    __ pushf();          // preserve rbx, and flags
+    __ pop(rax);
+    __ push(rax);
+    __ mov(rcx, rax);
+    //
+    // if we are unable to change the AC flag, we have a 386
+    //
+    __ xorl(rax, EFL_AC);
+    __ push(rax);
+    __ popf();
+    __ pushf();
+    __ pop(rax);
+    __ cmpptr(rax, rcx);
+    __ jccb(Assembler::notEqual, detect_486);
+
+    __ movl(rax, CPU_FAMILY_386);
+    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
+    __ jmp(done);
+
+    //
+    // If we are unable to change the ID flag, we have a 486 which does
+    // not support the "cpuid" instruction.
+    //
+    __ bind(detect_486);
+    __ mov(rax, rcx);
+    __ xorl(rax, EFL_ID);
+    __ push(rax);
+    __ popf();
+    __ pushf();
+    __ pop(rax);
+    __ cmpptr(rcx, rax);
+    __ jccb(Assembler::notEqual, detect_586);
+
+    __ bind(cpu486);
+    __ movl(rax, CPU_FAMILY_486);
+    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
+    __ jmp(done);
+
+    //
+    // At this point, we have a chip which supports the "cpuid" instruction
+    //
+    __ bind(detect_586);
+    __ xorl(rax, rax);
+    __ cpuid();
+    __ orl(rax, rax);
+    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
+                                        // value of at least 1, we give up and
+                                        // assume a 486
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    __ cmpl(rax, 3);     // Is cpuid(0x4) supported?
+    __ jccb(Assembler::belowEqual, std_cpuid1);
+
+    //
+    // cpuid(0x4) Deterministic cache params
+    //
+    __ movl(rax, 4);
+    __ xorl(rcx, rcx);   // L1 cache
+    __ cpuid();
+    __ push(rax);
+    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
+    __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
+    __ pop(rax);
+    __ jccb(Assembler::equal, std_cpuid1);
+
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    //
+    // Standard cpuid(0x1)
+    //
+    __ bind(std_cpuid1);
+    __ movl(rax, 1);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    __ movl(rax, 0x80000000);
+    __ cpuid();
+    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
+    __ jcc(Assembler::belowEqual, done);
+    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid1);
+    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid5);
+    //
+    // Extended cpuid(0x80000008)
+    //
+    __ movl(rax, 0x80000008);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    //
+    // Extended cpuid(0x80000005)
+    //
+    __ bind(ext_cpuid5);
+    __ movl(rax, 0x80000005);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    //
+    // Extended cpuid(0x80000001)
+    //
+    __ bind(ext_cpuid1);
+    __ movl(rax, 0x80000001);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    //
+    // return
+    //
+    __ bind(done);
+    __ popf();
+    __ pop(rsi);
+    __ pop(rbx);
+    __ pop(rbp);
+    __ ret(0);
+
+#   undef __
+
+    return start;
+  };
+};
+
+
+void VM_Version::get_processor_features() {
+
+  _cpu = 4; // 486 by default
+  _model = 0;
+  _stepping = 0;
+  _cpuFeatures = 0;
+  _logical_processors_per_package = 1;
+
+  if (!Use486InstrsOnly) {
+    // Get raw processor info
+    getPsrInfo_stub(&_cpuid_info);
+    assert_is_initialized();
+    _cpu = extended_cpu_family();
+    _model = extended_cpu_model();
+    _stepping = cpu_stepping();
+
+    if (cpu_family() > 4) { // it supports CPUID
+      _cpuFeatures = feature_flags();
+      // Logical processors are only available on P4s and above,
+      // and only if hyperthreading is available.
+      _logical_processors_per_package = logical_processor_count();
+    }
+  }
+
+  _supports_cx8 = supports_cmpxchg8();
+
+#ifdef _LP64
+  // OS should support SSE for x64 and hardware should support at least SSE2.
+  if (!VM_Version::supports_sse2()) {
+    vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
+  }
+#endif
+
+  // If the OS doesn't support SSE, we can't use this feature even if the HW does
+  if (!os::supports_sse())
+    _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
+
+  if (UseSSE < 4) {
+    _cpuFeatures &= ~CPU_SSE4_1;
+    _cpuFeatures &= ~CPU_SSE4_2;
+  }
+
+  if (UseSSE < 3) {
+    _cpuFeatures &= ~CPU_SSE3;
+    _cpuFeatures &= ~CPU_SSSE3;
+    _cpuFeatures &= ~CPU_SSE4A;
+  }
+
+  if (UseSSE < 2)
+    _cpuFeatures &= ~CPU_SSE2;
+
+  if (UseSSE < 1)
+    _cpuFeatures &= ~CPU_SSE;
+
+  if (logical_processors_per_package() == 1) {
+    // HT processor could be installed on a system which doesn't support HT.
+    _cpuFeatures &= ~CPU_HT;
+  }
+
+  char buf[256];
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+               cores_per_cpu(), threads_per_core(),
+               cpu_family(), _model, _stepping,
+               (supports_cmov() ? ", cmov" : ""),
+               (supports_cmpxchg8() ? ", cx8" : ""),
+               (supports_fxsr() ? ", fxsr" : ""),
+               (supports_mmx()  ? ", mmx"  : ""),
+               (supports_sse()  ? ", sse"  : ""),
+               (supports_sse2() ? ", sse2" : ""),
+               (supports_sse3() ? ", sse3" : ""),
+               (supports_ssse3()? ", ssse3": ""),
+               (supports_sse4_1() ? ", sse4.1" : ""),
+               (supports_sse4_2() ? ", sse4.2" : ""),
+               (supports_mmx_ext() ? ", mmxext" : ""),
+               (supports_3dnow()   ? ", 3dnow"  : ""),
+               (supports_3dnow2()  ? ", 3dnowext" : ""),
+               (supports_sse4a()   ? ", sse4a": ""),
+               (supports_ht() ? ", ht": ""));
+  _features_str = strdup(buf);
+
+  // UseSSE is set to the smaller of what hardware supports and what
+  // the command line requires.  I.e., you cannot set UseSSE to 2 on
+  // older Pentiums which do not support it.
+  if( UseSSE > 4 ) UseSSE=4;
+  if( UseSSE < 0 ) UseSSE=0;
+  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
+    UseSSE = MIN2((intx)3,UseSSE);
+  if( !supports_sse3() ) // Drop to 2 if no SSE3 support
+    UseSSE = MIN2((intx)2,UseSSE);
+  if( !supports_sse2() ) // Drop to 1 if no SSE2 support
+    UseSSE = MIN2((intx)1,UseSSE);
+  if( !supports_sse () ) // Drop to 0 if no SSE  support
+    UseSSE = 0;
+
+  // On new cpus instructions which update whole XMM register should be used
+  // to prevent partial register stall due to dependencies on high half.
+  //
+  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
+  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
+  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
+  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
+
+  if( is_amd() ) { // AMD cpus specific settings
+    if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
+      // Use it on new AMD cpus starting from Opteron.
+      UseAddressNop = true;
+    }
+    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
+      // Use it on new AMD cpus starting from Opteron.
+      UseNewLongLShift = true;
+    }
+    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
+      if( supports_sse4a() ) {
+        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
+      } else {
+        UseXmmLoadAndClearUpper = false;
+      }
+    }
+    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
+      if( supports_sse4a() ) {
+        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
+      } else {
+        UseXmmRegToRegMoveAll = false;
+      }
+    }
+    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
+      if( supports_sse4a() ) {
+        UseXmmI2F = true;
+      } else {
+        UseXmmI2F = false;
+      }
+    }
+    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
+      if( supports_sse4a() ) {
+        UseXmmI2D = true;
+      } else {
+        UseXmmI2D = false;
+      }
+    }
+  }
+
+  if( is_intel() ) { // Intel cpus specific settings
+    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
+      UseStoreImmI16 = false; // don't use it on Intel cpus
+    }
+    if( cpu_family() == 6 || cpu_family() == 15 ) {
+      if( FLAG_IS_DEFAULT(UseAddressNop) ) {
+        // Use it on all Intel cpus starting from PentiumPro
+        UseAddressNop = true;
+      }
+    }
+    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
+      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
+    }
+    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
+      if( supports_sse3() ) {
+        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
+      } else {
+        UseXmmRegToRegMoveAll = false;
+      }
+    }
+    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
+#ifdef COMPILER2
+      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
+        // For new Intel cpus do the next optimization:
+        // don't align the beginning of a loop if there are enough instructions
+        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
+        // in current fetch line (OptoLoopAlignment) or the padding
+        // is big (> MaxLoopPad).
+        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
+        // generated NOP instructions. 11 is the largest size of one
+        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
+        MaxLoopPad = 11;
+      }
+#endif // COMPILER2
+      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
+      }
+      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
+        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
+    }
+  }
+
+  assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
+  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
+
+  // set valid Prefetch instruction
+  if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
+  if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
+  if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
+  if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3;
+
+  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
+  if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
+  if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
+  if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3;
+
+  // Allocation prefetch settings
+  intx cache_line_size = L1_data_cache_line_size();
+  if( cache_line_size > AllocatePrefetchStepSize )
+    AllocatePrefetchStepSize = cache_line_size;
+  if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
+    AllocatePrefetchLines = 3; // Optimistic value
+  assert(AllocatePrefetchLines > 0, "invalid value");
+  if( AllocatePrefetchLines < 1 ) // set valid value in product VM
+    AllocatePrefetchLines = 1; // Conservative value
+
+  AllocatePrefetchDistance = allocate_prefetch_distance();
+  AllocatePrefetchStyle    = allocate_prefetch_style();
+
+  if( AllocatePrefetchStyle == 2 && is_intel() &&
+      cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
+#ifdef _LP64
+    AllocatePrefetchDistance = 384;
+#else
+    AllocatePrefetchDistance = 320;
+#endif
+  }
+  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
+
+#ifdef _LP64
+  // Prefetch settings
+  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
+  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
+  PrefetchFieldsAhead         = prefetch_fields_ahead();
+#endif
+
+#ifndef PRODUCT
+  if (PrintMiscellaneous && Verbose) {
+    tty->print_cr("Logical CPUs per core: %u",
+                  logical_processors_per_package());
+    tty->print_cr("UseSSE=%d",UseSSE);
+    tty->print("Allocation: ");
+    if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) {
+      tty->print_cr("no prefetching");
+    } else {
+      if (UseSSE == 0 && supports_3dnow()) {
+        tty->print("PREFETCHW");
+      } else if (UseSSE >= 1) {
+        if (AllocatePrefetchInstr == 0) {
+          tty->print("PREFETCHNTA");
+        } else if (AllocatePrefetchInstr == 1) {
+          tty->print("PREFETCHT0");
+        } else if (AllocatePrefetchInstr == 2) {
+          tty->print("PREFETCHT2");
+        } else if (AllocatePrefetchInstr == 3) {
+          tty->print("PREFETCHW");
+        }
+      }
+      if (AllocatePrefetchLines > 1) {
+        tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
+      } else {
+        tty->print_cr(" %d, one line", AllocatePrefetchDistance);
+      }
+    }
+
+    if (PrefetchCopyIntervalInBytes > 0) {
+      tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
+    }
+    if (PrefetchScanIntervalInBytes > 0) {
+      tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
+    }
+    if (PrefetchFieldsAhead > 0) {
+      tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
+    }
+  }
+#endif // !PRODUCT
+}
+
+void VM_Version::initialize() {
+  ResourceMark rm;
+  // Making this stub must be FIRST use of assembler
+
+  stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
+  if (stub_blob == NULL) {
+    vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
+  }
+  CodeBuffer c(stub_blob->instructions_begin(),
+               stub_blob->instructions_size());
+  VM_Version_StubGenerator g(&c);
+  getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
+                                   g.generate_getPsrInfo());
+
+  get_processor_features();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,459 @@
+/*
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class VM_Version : public Abstract_VM_Version {
+public:
+  // cpuid result register layouts.  These are all unions of a uint32_t
+  // (in case anyone wants access to the register as a whole) and a bitfield.
+
+  union StdCpuid1Eax {
+    uint32_t value;
+    struct {
+      uint32_t stepping   : 4,
+               model      : 4,
+               family     : 4,
+               proc_type  : 2,
+                          : 2,
+               ext_model  : 4,
+               ext_family : 8,
+                          : 4;
+    } bits;
+  };
+
+  union StdCpuid1Ebx { // example, unused
+    uint32_t value;
+    struct {
+      uint32_t brand_id         : 8,
+               clflush_size     : 8,
+               threads_per_cpu  : 8,
+               apic_id          : 8;
+    } bits;
+  };
+
+  union StdCpuid1Ecx {
+    uint32_t value;
+    struct {
+      uint32_t sse3     : 1,
+                        : 2,
+               monitor  : 1,
+                        : 1,
+               vmx      : 1,
+                        : 1,
+               est      : 1,
+                        : 1,
+               ssse3    : 1,
+               cid      : 1,
+                        : 2,
+               cmpxchg16: 1,
+                        : 4,
+               dca      : 1,
+               sse4_1   : 1,
+               sse4_2   : 1,
+                        : 11;
+    } bits;
+  };
+
+  union StdCpuid1Edx {
+    uint32_t value;
+    struct {
+      uint32_t          : 4,
+               tsc      : 1,
+                        : 3,
+               cmpxchg8 : 1,
+                        : 6,
+               cmov     : 1,
+                        : 7,
+               mmx      : 1,
+               fxsr     : 1,
+               sse      : 1,
+               sse2     : 1,
+                        : 1,
+               ht       : 1,
+                        : 3;
+    } bits;
+  };
+
+  union DcpCpuid4Eax {
+    uint32_t value;
+    struct {
+      uint32_t cache_type    : 5,
+                             : 21,
+               cores_per_cpu : 6;
+    } bits;
+  };
+
+  union DcpCpuid4Ebx {
+    uint32_t value;
+    struct {
+      uint32_t L1_line_size  : 12,
+               partitions    : 10,
+               associativity : 10;
+    } bits;
+  };
+
+  union ExtCpuid1Ecx {
+    uint32_t value;
+    struct {
+      uint32_t LahfSahf     : 1,
+               CmpLegacy    : 1,
+                            : 4,
+               abm          : 1,
+               sse4a        : 1,
+               misalignsse  : 1,
+               prefetchw    : 1,
+                            : 22;
+    } bits;
+  };
+
+  union ExtCpuid1Edx {
+    uint32_t value;
+    struct {
+      uint32_t           : 22,
+               mmx_amd   : 1,
+               mmx       : 1,
+               fxsr      : 1,
+                         : 4,
+               long_mode : 1,
+               tdnow2    : 1,
+               tdnow     : 1;
+    } bits;
+  };
+
+  union ExtCpuid5Ex {
+    uint32_t value;
+    struct {
+      uint32_t L1_line_size : 8,
+               L1_tag_lines : 8,
+               L1_assoc     : 8,
+               L1_size      : 8;
+    } bits;
+  };
+
+  union ExtCpuid8Ecx {
+    uint32_t value;
+    struct {
+      uint32_t cores_per_cpu : 8,
+                             : 24;
+    } bits;
+  };
+
+protected:
+   static int _cpu;
+   static int _model;
+   static int _stepping;
+   static int _cpuFeatures;     // features returned by the "cpuid" instruction
+                                // 0 if this instruction is not available
+   static const char* _features_str;
+
+   enum {
+     CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
+     CPU_CMOV   = (1 << 1),
+     CPU_FXSR   = (1 << 2),
+     CPU_HT     = (1 << 3),
+     CPU_MMX    = (1 << 4),
+     CPU_3DNOW  = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
+     CPU_SSE    = (1 << 6),
+     CPU_SSE2   = (1 << 7),
+     CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
+     CPU_SSSE3  = (1 << 9),
+     CPU_SSE4A  = (1 << 10),
+     CPU_SSE4_1 = (1 << 11),
+     CPU_SSE4_2 = (1 << 12)
+   } cpuFeatureFlags;
+
+  // cpuid information block.  All info derived from executing cpuid with
+  // various function numbers is stored here.  Intel and AMD info is
+  // merged in this block: accessor methods disentangle it.
+  //
+  // The info block is laid out in subblocks of 4 dwords corresponding to
+  // eax, ebx, ecx and edx, whether or not they contain anything useful.
+  struct CpuidInfo {
+    // cpuid function 0
+    uint32_t std_max_function;
+    uint32_t std_vendor_name_0;
+    uint32_t std_vendor_name_1;
+    uint32_t std_vendor_name_2;
+
+    // cpuid function 1
+    StdCpuid1Eax std_cpuid1_eax;
+    StdCpuid1Ebx std_cpuid1_ebx;
+    StdCpuid1Ecx std_cpuid1_ecx;
+    StdCpuid1Edx std_cpuid1_edx;
+
+    // cpuid function 4 (deterministic cache parameters)
+    DcpCpuid4Eax dcp_cpuid4_eax;
+    DcpCpuid4Ebx dcp_cpuid4_ebx;
+    uint32_t     dcp_cpuid4_ecx; // unused currently
+    uint32_t     dcp_cpuid4_edx; // unused currently
+
+    // cpuid function 0x80000000 // example, unused
+    uint32_t ext_max_function;
+    uint32_t ext_vendor_name_0;
+    uint32_t ext_vendor_name_1;
+    uint32_t ext_vendor_name_2;
+
+    // cpuid function 0x80000001
+    uint32_t     ext_cpuid1_eax; // reserved
+    uint32_t     ext_cpuid1_ebx; // reserved
+    ExtCpuid1Ecx ext_cpuid1_ecx;
+    ExtCpuid1Edx ext_cpuid1_edx;
+
+    // cpuid functions 0x80000002 thru 0x80000004: example, unused
+    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
+    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
+    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
+
+    // cpuid function 0x80000005 //AMD L1, Intel reserved
+    uint32_t     ext_cpuid5_eax; // unused currently
+    uint32_t     ext_cpuid5_ebx; // reserved
+    ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
+    ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
+
+    // cpuid function 0x80000008
+    uint32_t     ext_cpuid8_eax; // unused currently
+    uint32_t     ext_cpuid8_ebx; // reserved
+    ExtCpuid8Ecx ext_cpuid8_ecx;
+    uint32_t     ext_cpuid8_edx; // reserved
+  };
+
+  // The actual cpuid info block
+  static CpuidInfo _cpuid_info;
+
+  // Extractors and predicates
+  static uint32_t extended_cpu_family() {
+    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
+    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
+    return result;
+  }
+  static uint32_t extended_cpu_model() {
+    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
+    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
+    return result;
+  }
+  static uint32_t cpu_stepping() {
+    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
+    return result;
+  }
+  static uint logical_processor_count() {
+    uint result = threads_per_core();
+    return result;
+  }
+  static uint32_t feature_flags() {
+    uint32_t result = 0;
+    if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
+      result |= CPU_CX8;
+    if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
+      result |= CPU_CMOV;
+    if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() &&
+        _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)
+      result |= CPU_FXSR;
+    // HT flag is set for multi-core processors also.
+    if (threads_per_core() > 1)
+      result |= CPU_HT;
+    if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
+        _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
+      result |= CPU_MMX;
+    if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
+      result |= CPU_3DNOW;
+    if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
+      result |= CPU_SSE;
+    if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
+      result |= CPU_SSE2;
+    if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
+      result |= CPU_SSE3;
+    if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
+      result |= CPU_SSSE3;
+    if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
+      result |= CPU_SSE4A;
+    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
+      result |= CPU_SSE4_1;
+    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
+      result |= CPU_SSE4_2;
+    return result;
+  }
+
+  static void get_processor_features();
+
+public:
+  // Offsets for cpuid asm stub
+  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
+  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
+  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
+  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
+  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
+  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+
+  // Initialization
+  static void initialize();
+
+  // Asserts
+  static void assert_is_initialized() {
+    assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
+  }
+
+  //
+  // Processor family:
+  //       3   -  386
+  //       4   -  486
+  //       5   -  Pentium
+  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
+  //              Pentium M, Core Solo, Core Duo, Core2 Duo
+  //    family 6 model:   9,        13,       14,        15
+  //    0x0f   -  Pentium 4, Opteron
+  //
+  // Note: The cpu family should be used to select between
+  //       instruction sequences which are valid on all Intel
+  //       processors.  Use the feature test functions below to
+  //       determine whether a particular instruction is supported.
+  //
+  static int  cpu_family()        { return _cpu;}
+  static bool is_P6()             { return cpu_family() >= 6; }
+
+  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
+  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
+
+  static uint cores_per_cpu()  {
+    uint result = 1;
+    if (is_intel()) {
+      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
+    } else if (is_amd()) {
+      result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
+    }
+    return result;
+  }
+
+  static uint threads_per_core()  {
+    uint result = 1;
+    if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
+      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+               cores_per_cpu();
+    }
+    return result;
+  }
+
+  static intx L1_data_cache_line_size()  {
+    intx result = 0;
+    if (is_intel()) {
+      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
+    } else if (is_amd()) {
+      result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
+    }
+    if (result < 32) // not defined ?
+      result = 32;   // 32 bytes by default on x86 and other x64
+    return result;
+  }
+
+  //
+  // Feature identification
+  //
+  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
+  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
+  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
+  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
+  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
+  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
+  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
+  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
+  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
+  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
+  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
+  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
+  //
+  // AMD features
+  //
+  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
+  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
+  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
+  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
+
+  static bool supports_compare_and_exchange() { return true; }
+
+  static const char* cpu_features()           { return _features_str; }
+
+  static intx allocate_prefetch_distance() {
+    // This method should be called before allocate_prefetch_style().
+    //
+    // Hardware prefetching (distance/size in bytes):
+    // Pentium 3 -  64 /  32
+    // Pentium 4 - 256 / 128
+    // Athlon    -  64 /  32 ????
+    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
+    // Core      - 128 /  64
+    //
+    // Software prefetching (distance in bytes / instruction with best score):
+    // Pentium 3 - 128 / prefetchnta
+    // Pentium 4 - 512 / prefetchnta
+    // Athlon    - 128 / prefetchnta
+    // Opteron   - 256 / prefetchnta
+    // Core      - 256 / prefetchnta
+    // It will be used only when AllocatePrefetchStyle > 0
+
+    intx count = AllocatePrefetchDistance;
+    if (count < 0) {   // default ?
+      if (is_amd()) {  // AMD
+        if (supports_sse2())
+          count = 256; // Opteron
+        else
+          count = 128; // Athlon
+      } else {         // Intel
+        if (supports_sse2())
+          if (cpu_family() == 6) {
+            count = 256; // Pentium M, Core, Core2
+          } else {
+            count = 512; // Pentium 4
+          }
+        else
+          count = 128; // Pentium 3 (and all other old CPUs)
+      }
+    }
+    return count;
+  }
+  static intx allocate_prefetch_style() {
+    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
+    // Return 0 if AllocatePrefetchDistance was not defined.
+    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
+  }
+
+  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
+  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
+  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
+  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
+
+  // gc copy/scan is disabled if prefetchw isn't supported, because
+  // Prefetch::write emits an inlined prefetchw on Linux.
+  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
+  // The used prefetcht0 instruction works for both amd64 and em64t.
+  static intx prefetch_copy_interval_in_bytes() {
+    intx interval = PrefetchCopyIntervalInBytes;
+    return interval >= 0 ? interval : 576;
+  }
+  static intx prefetch_scan_interval_in_bytes() {
+    intx interval = PrefetchScanIntervalInBytes;
+    return interval >= 0 ? interval : 576;
+  }
+  static intx prefetch_fields_ahead() {
+    intx count = PrefetchFieldsAhead;
+    return count >= 0 ? count : 1;
+  }
+};
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,472 +0,0 @@
-/*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-# include "incls/_precompiled.incl"
-# include "incls/_vm_version_x86_32.cpp.incl"
-
-
-int VM_Version::_cpu;
-int VM_Version::_model;
-int VM_Version::_stepping;
-int VM_Version::_cpuFeatures;
-const char*           VM_Version::_features_str = "";
-VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
-
-static BufferBlob* stub_blob;
-static const int stub_size = 300;
-
-extern "C" {
-  typedef void (*getPsrInfo_stub_t)(void*);
-}
-static getPsrInfo_stub_t getPsrInfo_stub = NULL;
-
-
-class VM_Version_StubGenerator: public StubCodeGenerator {
- public:
-
-  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
-
-  address generate_getPsrInfo() {
-    // Flags to test CPU type.
-    const uint32_t EFL_AC           = 0x40000;
-    const uint32_t EFL_ID           = 0x200000;
-    // Values for when we don't have a CPUID instruction.
-    const int      CPU_FAMILY_SHIFT = 8;
-    const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
-    const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
-
-    Label detect_486, cpu486, detect_586, std_cpuid1;
-    Label ext_cpuid1, ext_cpuid5, done;
-
-    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
-#   define __ _masm->
-
-    address start = __ pc();
-
-    //
-    // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
-    //
-    __ push(rbp);
-    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
-    __ push(rbx);
-    __ push(rsi);
-    __ pushf();          // preserve rbx, and flags
-    __ pop(rax);
-    __ push(rax);
-    __ mov(rcx, rax);
-    //
-    // if we are unable to change the AC flag, we have a 386
-    //
-    __ xorl(rax, EFL_AC);
-    __ push(rax);
-    __ popf();
-    __ pushf();
-    __ pop(rax);
-    __ cmpptr(rax, rcx);
-    __ jccb(Assembler::notEqual, detect_486);
-
-    __ movl(rax, CPU_FAMILY_386);
-    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
-    __ jmp(done);
-
-    //
-    // If we are unable to change the ID flag, we have a 486 which does
-    // not support the "cpuid" instruction.
-    //
-    __ bind(detect_486);
-    __ mov(rax, rcx);
-    __ xorl(rax, EFL_ID);
-    __ push(rax);
-    __ popf();
-    __ pushf();
-    __ pop(rax);
-    __ cmpptr(rcx, rax);
-    __ jccb(Assembler::notEqual, detect_586);
-
-    __ bind(cpu486);
-    __ movl(rax, CPU_FAMILY_486);
-    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
-    __ jmp(done);
-
-    //
-    // at this point, we have a chip which supports the "cpuid" instruction
-    //
-    __ bind(detect_586);
-    __ xorptr(rax, rax);
-    __ cpuid();
-    __ orptr(rax, rax);
-    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
-                                        // value of at least 1, we give up and
-                                        // assume a 486
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    __ cmpl(rax, 3);     // Is cpuid(0x4) supported?
-    __ jccb(Assembler::belowEqual, std_cpuid1);
-
-    //
-    // cpuid(0x4) Deterministic cache params
-    //
-    __ movl(rax, 4);     // and rcx already set to 0x0
-    __ xorl(rcx, rcx);
-    __ cpuid();
-    __ push(rax);
-    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
-    __ orl(rax, rax);    // rax,[4:0] == 0 indicates invalid cache
-    __ pop(rax);
-    __ jccb(Assembler::equal, std_cpuid1);
-
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    //
-    // Standard cpuid(0x1)
-    //
-    __ bind(std_cpuid1);
-    __ movl(rax, 1);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    __ movl(rax, 0x80000000);
-    __ cpuid();
-    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
-    __ jcc(Assembler::belowEqual, done);
-    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid1);
-    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid5);
-    //
-    // Extended cpuid(0x80000008)
-    //
-    __ movl(rax, 0x80000008);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    //
-    // Extended cpuid(0x80000005)
-    //
-    __ bind(ext_cpuid5);
-    __ movl(rax, 0x80000005);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    //
-    // Extended cpuid(0x80000001)
-    //
-    __ bind(ext_cpuid1);
-    __ movl(rax, 0x80000001);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    //
-    // return
-    //
-    __ bind(done);
-    __ popf();
-    __ pop(rsi);
-    __ pop(rbx);
-    __ pop(rbp);
-    __ ret(0);
-
-#   undef __
-
-    return start;
-  };
-};
-
-
-void VM_Version::get_processor_features() {
-
-  _cpu = 4; // 486 by default
-  _model = 0;
-  _stepping = 0;
-  _cpuFeatures = 0;
-  _logical_processors_per_package = 1;
-  if (!Use486InstrsOnly) {
-    // Get raw processor info
-    getPsrInfo_stub(&_cpuid_info);
-    assert_is_initialized();
-    _cpu = extended_cpu_family();
-    _model = extended_cpu_model();
-    _stepping = cpu_stepping();
-    if (cpu_family() > 4) { // it supports CPUID
-      _cpuFeatures = feature_flags();
-      // Logical processors are only available on P4s and above,
-      // and only if hyperthreading is available.
-      _logical_processors_per_package = logical_processor_count();
-    }
-  }
-  _supports_cx8 = supports_cmpxchg8();
-  // if the OS doesn't support SSE, we can't use this feature even if the HW does
-  if( !os::supports_sse())
-    _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
-  if (UseSSE < 4) {
-    _cpuFeatures &= ~CPU_SSE4_1;
-    _cpuFeatures &= ~CPU_SSE4_2;
-  }
-  if (UseSSE < 3) {
-    _cpuFeatures &= ~CPU_SSE3;
-    _cpuFeatures &= ~CPU_SSSE3;
-    _cpuFeatures &= ~CPU_SSE4A;
-  }
-  if (UseSSE < 2)
-    _cpuFeatures &= ~CPU_SSE2;
-  if (UseSSE < 1)
-    _cpuFeatures &= ~CPU_SSE;
-
-  if (logical_processors_per_package() == 1) {
-    // HT processor could be installed on a system which doesn't support HT.
-    _cpuFeatures &= ~CPU_HT;
-  }
-
-  char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
-               cores_per_cpu(), threads_per_core(),
-               cpu_family(), _model, _stepping,
-               (supports_cmov() ? ", cmov" : ""),
-               (supports_cmpxchg8() ? ", cx8" : ""),
-               (supports_fxsr() ? ", fxsr" : ""),
-               (supports_mmx()  ? ", mmx"  : ""),
-               (supports_sse()  ? ", sse"  : ""),
-               (supports_sse2() ? ", sse2" : ""),
-               (supports_sse3() ? ", sse3" : ""),
-               (supports_ssse3()? ", ssse3": ""),
-               (supports_sse4_1() ? ", sse4.1" : ""),
-               (supports_sse4_2() ? ", sse4.2" : ""),
-               (supports_mmx_ext() ? ", mmxext" : ""),
-               (supports_3dnow()   ? ", 3dnow"  : ""),
-               (supports_3dnow2()  ? ", 3dnowext" : ""),
-               (supports_sse4a()   ? ", sse4a": ""),
-               (supports_ht() ? ", ht": ""));
-  _features_str = strdup(buf);
-
-  // UseSSE is set to the smaller of what hardware supports and what
-  // the command line requires.  I.e., you cannot set UseSSE to 2 on
-  // older Pentiums which do not support it.
-  if( UseSSE > 4 ) UseSSE=4;
-  if( UseSSE < 0 ) UseSSE=0;
-  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
-    UseSSE = MIN2((intx)3,UseSSE);
-  if( !supports_sse3() ) // Drop to 2 if no SSE3 support
-    UseSSE = MIN2((intx)2,UseSSE);
-  if( !supports_sse2() ) // Drop to 1 if no SSE2 support
-    UseSSE = MIN2((intx)1,UseSSE);
-  if( !supports_sse () ) // Drop to 0 if no SSE  support
-    UseSSE = 0;
-
-  // On new cpus instructions which update whole XMM register should be used
-  // to prevent partial register stall due to dependencies on high half.
-  //
-  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
-  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
-  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
-  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
-
-  if( is_amd() ) { // AMD cpus specific settings
-    if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
-      // Use it on new AMD cpus starting from Opteron.
-      UseAddressNop = true;
-    }
-    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
-      // Use it on new AMD cpus starting from Opteron.
-      UseNewLongLShift = true;
-    }
-    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
-      if( supports_sse4a() ) {
-        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
-      } else {
-        UseXmmLoadAndClearUpper = false;
-      }
-    }
-    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
-      if( supports_sse4a() ) {
-        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
-      } else {
-        UseXmmRegToRegMoveAll = false;
-      }
-    }
-    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
-      if( supports_sse4a() ) {
-        UseXmmI2F = true;
-      } else {
-        UseXmmI2F = false;
-      }
-    }
-    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
-      if( supports_sse4a() ) {
-        UseXmmI2D = true;
-      } else {
-        UseXmmI2D = false;
-      }
-    }
-  }
-
-  if( is_intel() ) { // Intel cpus specific settings
-    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
-      UseStoreImmI16 = false; // don't use it on Intel cpus
-    }
-    if( cpu_family() == 6 || cpu_family() == 15 ) {
-      if( FLAG_IS_DEFAULT(UseAddressNop) ) {
-        // Use it on all Intel cpus starting from PentiumPro
-        UseAddressNop = true;
-      }
-    }
-    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
-      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
-    }
-    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
-      if( supports_sse3() ) {
-        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
-      } else {
-        UseXmmRegToRegMoveAll = false;
-      }
-    }
-    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
-#ifdef COMPILER2
-      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
-        // For new Intel cpus do the next optimization:
-        // don't align the beginning of a loop if there are enough instructions
-        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
-        // in current fetch line (OptoLoopAlignment) or the padding
-        // is big (> MaxLoopPad).
-        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
-        // generated NOP instructions. 11 is the largest size of one
-        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
-        MaxLoopPad = 11;
-      }
-#endif // COMPILER2
-      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
-        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
-      }
-      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
-        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
-          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
-        }
-      }
-    }
-  }
-
-  assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
-  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
-
-  // set valid Prefetch instruction
-  if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
-  if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
-  if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
-  if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3;
-
-  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
-  if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
-  if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
-  if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3;
-
-  // Allocation prefetch settings
-  intx cache_line_size = L1_data_cache_line_size();
-  if( cache_line_size > AllocatePrefetchStepSize )
-    AllocatePrefetchStepSize = cache_line_size;
-  if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
-    AllocatePrefetchLines = 3; // Optimistic value
-  assert(AllocatePrefetchLines > 0, "invalid value");
-  if( AllocatePrefetchLines < 1 ) // set valid value in product VM
-    AllocatePrefetchLines = 1; // Conservative value
-
-  AllocatePrefetchDistance = allocate_prefetch_distance();
-  AllocatePrefetchStyle    = allocate_prefetch_style();
-
-  if( AllocatePrefetchStyle == 2 && is_intel() &&
-      cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
-    AllocatePrefetchDistance = 320;
-  }
-  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
-
-#ifndef PRODUCT
-  if (PrintMiscellaneous && Verbose) {
-    tty->print_cr("Logical CPUs per core: %u",
-                  logical_processors_per_package());
-    tty->print_cr("UseSSE=%d",UseSSE);
-    tty->print("Allocation: ");
-    if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) {
-      tty->print_cr("no prefetching");
-    } else {
-      if (UseSSE == 0 && supports_3dnow()) {
-        tty->print("PREFETCHW");
-      } else if (UseSSE >= 1) {
-        if (AllocatePrefetchInstr == 0) {
-          tty->print("PREFETCHNTA");
-        } else if (AllocatePrefetchInstr == 1) {
-          tty->print("PREFETCHT0");
-        } else if (AllocatePrefetchInstr == 2) {
-          tty->print("PREFETCHT2");
-        } else if (AllocatePrefetchInstr == 3) {
-          tty->print("PREFETCHW");
-        }
-      }
-      if (AllocatePrefetchLines > 1) {
-        tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
-      } else {
-        tty->print_cr(" %d, one line", AllocatePrefetchDistance);
-      }
-    }
-  }
-#endif // !PRODUCT
-}
-
-void VM_Version::initialize() {
-  ResourceMark rm;
-  // Making this stub must be FIRST use of assembler
-
-  stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
-  if (stub_blob == NULL) {
-    vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
-  }
-  CodeBuffer c(stub_blob->instructions_begin(),
-               stub_blob->instructions_size());
-  VM_Version_StubGenerator g(&c);
-  getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
-                                   g.generate_getPsrInfo());
-
-  get_processor_features();
-}
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,439 +0,0 @@
-/*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-class VM_Version: public Abstract_VM_Version {
-public:
-  // cpuid result register layouts.  These are all unions of a uint32_t
-  // (in case anyone wants access to the register as a whole) and a bitfield.
-
-  union StdCpuid1Eax {
-    uint32_t value;
-    struct {
-      uint32_t stepping   : 4,
-               model      : 4,
-               family     : 4,
-               proc_type  : 2,
-                          : 2,
-               ext_model  : 4,
-               ext_family : 8,
-                          : 4;
-    } bits;
-  };
-
-  union StdCpuid1Ebx { // example, unused
-    uint32_t value;
-    struct {
-      uint32_t brand_id         : 8,
-               clflush_size     : 8,
-               threads_per_cpu  : 8,
-               apic_id          : 8;
-    } bits;
-  };
-
-  union StdCpuid1Ecx {
-    uint32_t value;
-    struct {
-      uint32_t sse3     : 1,
-                        : 2,
-               monitor  : 1,
-                        : 1,
-               vmx      : 1,
-                        : 1,
-               est      : 1,
-                        : 1,
-               ssse3    : 1,
-               cid      : 1,
-                        : 2,
-               cmpxchg16: 1,
-                        : 4,
-               dca      : 1,
-               sse4_1   : 1,
-               sse4_2   : 1,
-                        : 11;
-    } bits;
-  };
-
-  union StdCpuid1Edx {
-    uint32_t value;
-    struct {
-      uint32_t          : 4,
-               tsc      : 1,
-                        : 3,
-               cmpxchg8 : 1,
-                        : 6,
-               cmov     : 1,
-                        : 7,
-               mmx      : 1,
-               fxsr     : 1,
-               sse      : 1,
-               sse2     : 1,
-                        : 1,
-               ht       : 1,
-                        : 3;
-    } bits;
-  };
-
-  union DcpCpuid4Eax {
-    uint32_t value;
-    struct {
-      uint32_t cache_type    : 5,
-                             : 21,
-               cores_per_cpu : 6;
-    } bits;
-  };
-
-  union DcpCpuid4Ebx {
-    uint32_t value;
-    struct {
-      uint32_t L1_line_size  : 12,
-               partitions    : 10,
-               associativity : 10;
-    } bits;
-  };
-
-  union ExtCpuid1Ecx {
-    uint32_t value;
-    struct {
-      uint32_t LahfSahf     : 1,
-               CmpLegacy    : 1,
-                            : 4,
-               abm          : 1,
-               sse4a        : 1,
-               misalignsse  : 1,
-               prefetchw    : 1,
-                            : 22;
-    } bits;
-  };
-
-  union ExtCpuid1Edx {
-    uint32_t value;
-    struct {
-      uint32_t           : 22,
-               mmx_amd   : 1,
-               mmx       : 1,
-               fxsr      : 1,
-                         : 4,
-               long_mode : 1,
-               tdnow2    : 1,
-               tdnow     : 1;
-    } bits;
-  };
-
-  union ExtCpuid5Ex {
-    uint32_t value;
-    struct {
-      uint32_t L1_line_size : 8,
-               L1_tag_lines : 8,
-               L1_assoc     : 8,
-               L1_size      : 8;
-    } bits;
-  };
-
-  union ExtCpuid8Ecx {
-    uint32_t value;
-    struct {
-      uint32_t cores_per_cpu : 8,
-                             : 24;
-    } bits;
-  };
-
-protected:
-   static int _cpu;
-   static int _model;
-   static int _stepping;
-   static int _cpuFeatures;     // features returned by the "cpuid" instruction
-                                // 0 if this instruction is not available
-   static const char* _features_str;
-
-   enum {
-     CPU_CX8  = (1 << 0), // next bits are from cpuid 1 (EDX)
-     CPU_CMOV = (1 << 1),
-     CPU_FXSR = (1 << 2),
-     CPU_HT   = (1 << 3),
-     CPU_MMX  = (1 << 4),
-     CPU_3DNOW= (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
-     CPU_SSE  = (1 << 6),
-     CPU_SSE2 = (1 << 7),
-     CPU_SSE3 = (1 << 8), // sse3  comes from cpuid 1 (ECX)
-     CPU_SSSE3= (1 << 9),
-     CPU_SSE4A= (1 <<10),
-     CPU_SSE4_1 = (1 << 11),
-     CPU_SSE4_2 = (1 << 12)
-   } cpuFeatureFlags;
-
-  // cpuid information block.  All info derived from executing cpuid with
-  // various function numbers is stored here.  Intel and AMD info is
-  // merged in this block: accessor methods disentangle it.
-  //
-  // The info block is laid out in subblocks of 4 dwords corresponding to
-  // rax, rbx, rcx and rdx, whether or not they contain anything useful.
-  struct CpuidInfo {
-    // cpuid function 0
-    uint32_t std_max_function;
-    uint32_t std_vendor_name_0;
-    uint32_t std_vendor_name_1;
-    uint32_t std_vendor_name_2;
-
-    // cpuid function 1
-    StdCpuid1Eax std_cpuid1_rax;
-    StdCpuid1Ebx std_cpuid1_rbx;
-    StdCpuid1Ecx std_cpuid1_rcx;
-    StdCpuid1Edx std_cpuid1_rdx;
-
-    // cpuid function 4 (deterministic cache parameters)
-    DcpCpuid4Eax dcp_cpuid4_rax;
-    DcpCpuid4Ebx dcp_cpuid4_rbx;
-    uint32_t     dcp_cpuid4_rcx; // unused currently
-    uint32_t     dcp_cpuid4_rdx; // unused currently
-
-    // cpuid function 0x80000000 // example, unused
-    uint32_t ext_max_function;
-    uint32_t ext_vendor_name_0;
-    uint32_t ext_vendor_name_1;
-    uint32_t ext_vendor_name_2;
-
-    // cpuid function 0x80000001
-    uint32_t     ext_cpuid1_rax; // reserved
-    uint32_t     ext_cpuid1_rbx; // reserved
-    ExtCpuid1Ecx ext_cpuid1_rcx;
-    ExtCpuid1Edx ext_cpuid1_rdx;
-
-    // cpuid functions 0x80000002 thru 0x80000004: example, unused
-    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
-    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
-    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
-
-    // cpuid function 0x80000005 //AMD L1, Intel reserved
-    uint32_t     ext_cpuid5_rax; // unused currently
-    uint32_t     ext_cpuid5_rbx; // reserved
-    ExtCpuid5Ex  ext_cpuid5_rcx; // L1 data cache info (AMD)
-    ExtCpuid5Ex  ext_cpuid5_rdx; // L1 instruction cache info (AMD)
-
-    // cpuid function 0x80000008
-    uint32_t     ext_cpuid8_rax; // unused currently
-    uint32_t     ext_cpuid8_rbx; // reserved
-    ExtCpuid8Ecx ext_cpuid8_rcx;
-    uint32_t     ext_cpuid8_rdx; // reserved
-  };
-
-  // The actual cpuid info block
-  static CpuidInfo _cpuid_info;
-
-  // Extractors and predicates
-  static uint32_t extended_cpu_family() {
-    uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family;
-    result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
-    return result;
-  }
-  static uint32_t extended_cpu_model() {
-    uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model;
-    result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
-    return result;
-  }
-  static uint32_t cpu_stepping() {
-    uint32_t result = _cpuid_info.std_cpuid1_rax.bits.stepping;
-    return result;
-  }
-  static uint logical_processor_count() {
-    uint result = threads_per_core();
-    return result;
-  }
-  static uint32_t feature_flags() {
-    uint32_t result = 0;
-    if (_cpuid_info.std_cpuid1_rdx.bits.cmpxchg8 != 0)
-      result |= CPU_CX8;
-    if (_cpuid_info.std_cpuid1_rdx.bits.cmov != 0)
-      result |= CPU_CMOV;
-    if (_cpuid_info.std_cpuid1_rdx.bits.fxsr != 0 || is_amd() &&
-        _cpuid_info.ext_cpuid1_rdx.bits.fxsr != 0)
-      result |= CPU_FXSR;
-    // HT flag is set for multi-core processors also.
-    if (threads_per_core() > 1)
-      result |= CPU_HT;
-    if (_cpuid_info.std_cpuid1_rdx.bits.mmx != 0 || is_amd() &&
-        _cpuid_info.ext_cpuid1_rdx.bits.mmx != 0)
-      result |= CPU_MMX;
-    if (is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow != 0)
-      result |= CPU_3DNOW;
-    if (_cpuid_info.std_cpuid1_rdx.bits.sse != 0)
-      result |= CPU_SSE;
-    if (_cpuid_info.std_cpuid1_rdx.bits.sse2 != 0)
-      result |= CPU_SSE2;
-    if (_cpuid_info.std_cpuid1_rcx.bits.sse3 != 0)
-      result |= CPU_SSE3;
-    if (_cpuid_info.std_cpuid1_rcx.bits.ssse3 != 0)
-      result |= CPU_SSSE3;
-    if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0)
-      result |= CPU_SSE4A;
-    if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0)
-      result |= CPU_SSE4_1;
-    if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0)
-      result |= CPU_SSE4_2;
-    return result;
-  }
-
-  static void get_processor_features();
-
-public:
-  // Offsets for cpuid asm stub
-  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
-  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_rax); }
-  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_rax); }
-  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_rax); }
-  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_rax); }
-  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_rax); }
-
-  // Initialization
-  static void initialize();
-
-  // Asserts
-  static void assert_is_initialized() {
-    assert(_cpuid_info.std_cpuid1_rax.bits.family != 0, "VM_Version not initialized");
-  }
-
-  //
-  // Processor family:
-  //       3   -  386
-  //       4   -  486
-  //       5   -  Pentium
-  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
-  //              Pentium M, Core Solo, Core Duo, Core2 Duo
-  //    family 6 model:   9,        13,       14,        15
-  //    0x0f   -  Pentium 4, Opteron
-  //
-  // Note: The cpu family should be used to select between
-  //       instruction sequences which are valid on all Intel
-  //       processors.  Use the feature test functions below to
-  //       determine whether a particular instruction is supported.
-  //
-  static int  cpu_family()        { return _cpu;}
-  static bool is_P6()             { return cpu_family() >= 6; }
-
-  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
-  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
-
-  static uint cores_per_cpu()  {
-    uint result = 1;
-    if (is_intel()) {
-      result = (_cpuid_info.dcp_cpuid4_rax.bits.cores_per_cpu + 1);
-    } else if (is_amd()) {
-      result = (_cpuid_info.ext_cpuid8_rcx.bits.cores_per_cpu + 1);
-    }
-    return result;
-  }
-
-  static uint threads_per_core()  {
-    uint result = 1;
-    if (_cpuid_info.std_cpuid1_rdx.bits.ht != 0) {
-      result = _cpuid_info.std_cpuid1_rbx.bits.threads_per_cpu /
-               cores_per_cpu();
-    }
-    return result;
-  }
-
-  static intx L1_data_cache_line_size()  {
-    intx result = 0;
-    if (is_intel()) {
-      result = (_cpuid_info.dcp_cpuid4_rbx.bits.L1_line_size + 1);
-    } else if (is_amd()) {
-      result = _cpuid_info.ext_cpuid5_rcx.bits.L1_line_size;
-    }
-    if (result < 32) // not defined ?
-      result = 32;   // 32 bytes by default on x86
-    return result;
-  }
-
-  //
-  // Feature identification
-  //
-  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
-  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
-  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
-  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
-  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
-  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
-  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
-  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
-  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
-  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
-  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
-  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
-  //
-  // AMD features
-  //
-  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
-  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.mmx_amd != 0; }
-  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow2 != 0; }
-  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
-
-  static bool supports_compare_and_exchange() { return true; }
-
-  static const char* cpu_features()           { return _features_str; }
-
-  static intx allocate_prefetch_distance() {
-    // This method should be called before allocate_prefetch_style().
-    //
-    // Hardware prefetching (distance/size in bytes):
-    // Pentium 3 -  64 /  32
-    // Pentium 4 - 256 / 128
-    // Athlon    -  64 /  32 ????
-    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
-    // Core      - 128 /  64
-    //
-    // Software prefetching (distance in bytes / instruction with best score):
-    // Pentium 3 - 128 / prefetchnta
-    // Pentium 4 - 512 / prefetchnta
-    // Athlon    - 128 / prefetchnta
-    // Opteron   - 256 / prefetchnta
-    // Core      - 256 / prefetchnta
-    // It will be used only when AllocatePrefetchStyle > 0
-
-    intx count = AllocatePrefetchDistance;
-    if (count < 0) {   // default ?
-      if (is_amd()) {  // AMD
-        if (supports_sse2())
-          count = 256; // Opteron
-        else
-          count = 128; // Athlon
-      } else {         // Intel
-        if (supports_sse2())
-          if (cpu_family() == 6) {
-            count = 256; // Pentium M, Core, Core2
-          } else {
-            count = 512; // Pentium 4
-          }
-        else
-          count = 128; // Pentium 3 (and all other old CPUs)
-      }
-    }
-    return count;
-  }
-  static intx allocate_prefetch_style() {
-    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
-    // Return 0 if AllocatePrefetchDistance was not defined or
-    // prefetch instruction is not supported.
-    return (AllocatePrefetchDistance > 0 &&
-            (supports_3dnow() || supports_sse())) ? AllocatePrefetchStyle : 0;
-  }
-};
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,419 +0,0 @@
-/*
- * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-# include "incls/_precompiled.incl"
-# include "incls/_vm_version_x86_64.cpp.incl"
-
-int VM_Version::_cpu;
-int VM_Version::_model;
-int VM_Version::_stepping;
-int VM_Version::_cpuFeatures;
-const char*           VM_Version::_features_str = "";
-VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
-
-static BufferBlob* stub_blob;
-static const int stub_size = 300;
-
-extern "C" {
-  typedef void (*getPsrInfo_stub_t)(void*);
-}
-static getPsrInfo_stub_t getPsrInfo_stub = NULL;
-
-
-class VM_Version_StubGenerator: public StubCodeGenerator {
- public:
-
-  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
-
-  address generate_getPsrInfo() {
-
-    Label std_cpuid1, ext_cpuid1, ext_cpuid5, done;
-
-    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
-#   define __ _masm->
-
-    address start = __ pc();
-
-    //
-    // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
-    //
-    // rcx and rdx are first and second argument registers on windows
-
-    __ push(rbp);
-    __ mov(rbp, c_rarg0); // cpuid_info address
-    __ push(rbx);
-    __ push(rsi);
-
-    //
-    // we have a chip which supports the "cpuid" instruction
-    //
-    __ xorl(rax, rax);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    __ cmpl(rax, 3);     // Is cpuid(0x4) supported?
-    __ jccb(Assembler::belowEqual, std_cpuid1);
-
-    //
-    // cpuid(0x4) Deterministic cache params
-    //
-    __ movl(rax, 4);
-    __ xorl(rcx, rcx);   // L1 cache
-    __ cpuid();
-    __ push(rax);
-    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
-    __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
-    __ pop(rax);
-    __ jccb(Assembler::equal, std_cpuid1);
-
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    //
-    // Standard cpuid(0x1)
-    //
-    __ bind(std_cpuid1);
-    __ movl(rax, 1);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    __ movl(rax, 0x80000000);
-    __ cpuid();
-    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
-    __ jcc(Assembler::belowEqual, done);
-    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid1);
-    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid5);
-    //
-    // Extended cpuid(0x80000008)
-    //
-    __ movl(rax, 0x80000008);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    //
-    // Extended cpuid(0x80000005)
-    //
-    __ bind(ext_cpuid5);
-    __ movl(rax, 0x80000005);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    //
-    // Extended cpuid(0x80000001)
-    //
-    __ bind(ext_cpuid1);
-    __ movl(rax, 0x80000001);
-    __ cpuid();
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
-    __ movl(Address(rsi, 0), rax);
-    __ movl(Address(rsi, 4), rbx);
-    __ movl(Address(rsi, 8), rcx);
-    __ movl(Address(rsi,12), rdx);
-
-    //
-    // return
-    //
-    __ bind(done);
-    __ pop(rsi);
-    __ pop(rbx);
-    __ pop(rbp);
-    __ ret(0);
-
-#   undef __
-
-    return start;
-  };
-};
-
-
-void VM_Version::get_processor_features() {
-
-  _logical_processors_per_package = 1;
-  // Get raw processor info
-  getPsrInfo_stub(&_cpuid_info);
-  assert_is_initialized();
-  _cpu = extended_cpu_family();
-  _model = extended_cpu_model();
-  _stepping = cpu_stepping();
-  _cpuFeatures = feature_flags();
-  // Logical processors are only available on P4s and above,
-  // and only if hyperthreading is available.
-  _logical_processors_per_package = logical_processor_count();
-  _supports_cx8    = supports_cmpxchg8();
-  // OS should support SSE for x64 and hardware should support at least SSE2.
-  if (!VM_Version::supports_sse2()) {
-    vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
-  }
-  if (UseSSE < 4) {
-    _cpuFeatures &= ~CPU_SSE4_1;
-    _cpuFeatures &= ~CPU_SSE4_2;
-  }
-  if (UseSSE < 3) {
-    _cpuFeatures &= ~CPU_SSE3;
-    _cpuFeatures &= ~CPU_SSSE3;
-    _cpuFeatures &= ~CPU_SSE4A;
-  }
-  if (UseSSE < 2)
-    _cpuFeatures &= ~CPU_SSE2;
-  if (UseSSE < 1)
-    _cpuFeatures &= ~CPU_SSE;
-
-  if (logical_processors_per_package() == 1) {
-    // HT processor could be installed on a system which doesn't support HT.
-    _cpuFeatures &= ~CPU_HT;
-  }
-
-  char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
-               cores_per_cpu(), threads_per_core(),
-               cpu_family(), _model, _stepping,
-               (supports_cmov() ? ", cmov" : ""),
-               (supports_cmpxchg8() ? ", cx8" : ""),
-               (supports_fxsr() ? ", fxsr" : ""),
-               (supports_mmx()  ? ", mmx"  : ""),
-               (supports_sse()  ? ", sse"  : ""),
-               (supports_sse2() ? ", sse2" : ""),
-               (supports_sse3() ? ", sse3" : ""),
-               (supports_ssse3()? ", ssse3": ""),
-               (supports_sse4_1() ? ", sse4.1" : ""),
-               (supports_sse4_2() ? ", sse4.2" : ""),
-               (supports_mmx_ext() ? ", mmxext" : ""),
-               (supports_3dnow()   ? ", 3dnow"  : ""),
-               (supports_3dnow2()  ? ", 3dnowext" : ""),
-               (supports_sse4a()   ? ", sse4a": ""),
-               (supports_ht() ? ", ht": ""));
-  _features_str = strdup(buf);
-
-  // UseSSE is set to the smaller of what hardware supports and what
-  // the command line requires.  I.e., you cannot set UseSSE to 2 on
-  // older Pentiums which do not support it.
-  if( UseSSE > 4 ) UseSSE=4;
-  if( UseSSE < 0 ) UseSSE=0;
-  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
-    UseSSE = MIN2((intx)3,UseSSE);
-  if( !supports_sse3() ) // Drop to 2 if no SSE3 support
-    UseSSE = MIN2((intx)2,UseSSE);
-  if( !supports_sse2() ) // Drop to 1 if no SSE2 support
-    UseSSE = MIN2((intx)1,UseSSE);
-  if( !supports_sse () ) // Drop to 0 if no SSE  support
-    UseSSE = 0;
-
-  // On new cpus instructions which update whole XMM register should be used
-  // to prevent partial register stall due to dependencies on high half.
-  //
-  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
-  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
-  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
-  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
-
-  if( is_amd() ) { // AMD cpus specific settings
-    if( FLAG_IS_DEFAULT(UseAddressNop) ) {
-      // Use it on all AMD cpus starting from Opteron (don't need
-      // a cpu check since only Opteron and new cpus support 64-bits mode).
-      UseAddressNop = true;
-    }
-    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
-      if( supports_sse4a() ) {
-        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
-      } else {
-        UseXmmLoadAndClearUpper = false;
-      }
-    }
-    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
-      if( supports_sse4a() ) {
-        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
-      } else {
-        UseXmmRegToRegMoveAll = false;
-      }
-    }
-    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
-      if( supports_sse4a() ) {
-        UseXmmI2F = true;
-      } else {
-        UseXmmI2F = false;
-      }
-    }
-    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
-      if( supports_sse4a() ) {
-        UseXmmI2D = true;
-      } else {
-        UseXmmI2D = false;
-      }
-    }
-  }
-
-  if( is_intel() ) { // Intel cpus specific settings
-    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
-      UseStoreImmI16 = false; // don't use it on Intel cpus
-    }
-    if( FLAG_IS_DEFAULT(UseAddressNop) ) {
-      // Use it on all Intel cpus starting from PentiumPro
-      // (don't need a cpu check since only new cpus support 64-bits mode).
-      UseAddressNop = true;
-    }
-    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
-      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
-    }
-    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
-      if( supports_sse3() ) {
-        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
-      } else {
-        UseXmmRegToRegMoveAll = false;
-      }
-    }
-    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
-#ifdef COMPILER2
-      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
-        // For new Intel cpus do the next optimization:
-        // don't align the beginning of a loop if there are enough instructions
-        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
-        // in current fetch line (OptoLoopAlignment) or the padding
-        // is big (> MaxLoopPad).
-        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
-        // generated NOP instructions. 11 is the largest size of one
-        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
-        MaxLoopPad = 11;
-      }
-#endif // COMPILER2
-      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
-        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
-      }
-      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
-        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
-          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
-        }
-      }
-    }
-  }
-
-  assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
-  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
-
-  // set valid Prefetch instruction
-  if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
-  if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
-  if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
-
-  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
-  if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
-  if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
-
-  // Allocation prefetch settings
-  intx cache_line_size = L1_data_cache_line_size();
-  if( cache_line_size > AllocatePrefetchStepSize )
-    AllocatePrefetchStepSize = cache_line_size;
-  if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
-    AllocatePrefetchLines = 3; // Optimistic value
-  assert(AllocatePrefetchLines > 0, "invalid value");
-  if( AllocatePrefetchLines < 1 ) // set valid value in product VM
-    AllocatePrefetchLines = 1; // Conservative value
-
-  AllocatePrefetchDistance = allocate_prefetch_distance();
-  AllocatePrefetchStyle    = allocate_prefetch_style();
-
-  if( AllocatePrefetchStyle == 2 && is_intel() &&
-      cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
-    AllocatePrefetchDistance = 384;
-  }
-  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
-
-  // Prefetch settings
-  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
-  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
-  PrefetchFieldsAhead         = prefetch_fields_ahead();
-
-#ifndef PRODUCT
-  if (PrintMiscellaneous && Verbose) {
-    tty->print_cr("Logical CPUs per core: %u",
-                  logical_processors_per_package());
-    tty->print_cr("UseSSE=%d",UseSSE);
-    tty->print("Allocation: ");
-    if (AllocatePrefetchStyle <= 0) {
-      tty->print_cr("no prefetching");
-    } else {
-      if (AllocatePrefetchInstr == 0) {
-        tty->print("PREFETCHNTA");
-      } else if (AllocatePrefetchInstr == 1) {
-        tty->print("PREFETCHT0");
-      } else if (AllocatePrefetchInstr == 2) {
-        tty->print("PREFETCHT2");
-      } else if (AllocatePrefetchInstr == 3) {
-        tty->print("PREFETCHW");
-      }
-      if (AllocatePrefetchLines > 1) {
-        tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
-      } else {
-        tty->print_cr(" %d, one line", AllocatePrefetchDistance);
-      }
-    }
-    if (PrefetchCopyIntervalInBytes > 0) {
-      tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
-    }
-    if (PrefetchScanIntervalInBytes > 0) {
-      tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
-    }
-    if (PrefetchFieldsAhead > 0) {
-      tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
-    }
-  }
-#endif // !PRODUCT
-}
-
-void VM_Version::initialize() {
-  ResourceMark rm;
-  // Making this stub must be FIRST use of assembler
-
-  stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
-  if (stub_blob == NULL) {
-    vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
-  }
-  CodeBuffer c(stub_blob->instructions_begin(),
-               stub_blob->instructions_size());
-  VM_Version_StubGenerator g(&c);
-  getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
-                                   g.generate_getPsrInfo());
-
-  get_processor_features();
-}
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,449 +0,0 @@
-/*
- * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-class VM_Version : public Abstract_VM_Version {
-public:
-  // cpuid result register layouts.  These are all unions of a uint32_t
-  // (in case anyone wants access to the register as a whole) and a bitfield.
-
-  union StdCpuid1Eax {
-    uint32_t value;
-    struct {
-      uint32_t stepping   : 4,
-               model      : 4,
-               family     : 4,
-               proc_type  : 2,
-                          : 2,
-               ext_model  : 4,
-               ext_family : 8,
-                          : 4;
-    } bits;
-  };
-
-  union StdCpuid1Ebx { // example, unused
-    uint32_t value;
-    struct {
-      uint32_t brand_id         : 8,
-               clflush_size     : 8,
-               threads_per_cpu  : 8,
-               apic_id          : 8;
-    } bits;
-  };
-
-  union StdCpuid1Ecx {
-    uint32_t value;
-    struct {
-      uint32_t sse3     : 1,
-                        : 2,
-               monitor  : 1,
-                        : 1,
-               vmx      : 1,
-                        : 1,
-               est      : 1,
-                        : 1,
-               ssse3    : 1,
-               cid      : 1,
-                        : 2,
-               cmpxchg16: 1,
-                        : 4,
-               dca      : 1,
-               sse4_1   : 1,
-               sse4_2   : 1,
-                        : 11;
-    } bits;
-  };
-
-  union StdCpuid1Edx {
-    uint32_t value;
-    struct {
-      uint32_t          : 4,
-               tsc      : 1,
-                        : 3,
-               cmpxchg8 : 1,
-                        : 6,
-               cmov     : 1,
-                        : 7,
-               mmx      : 1,
-               fxsr     : 1,
-               sse      : 1,
-               sse2     : 1,
-                        : 1,
-               ht       : 1,
-                        : 3;
-    } bits;
-  };
-
-  union DcpCpuid4Eax {
-    uint32_t value;
-    struct {
-      uint32_t cache_type    : 5,
-                             : 21,
-               cores_per_cpu : 6;
-    } bits;
-  };
-
-  union DcpCpuid4Ebx {
-    uint32_t value;
-    struct {
-      uint32_t L1_line_size  : 12,
-               partitions    : 10,
-               associativity : 10;
-    } bits;
-  };
-
-  union ExtCpuid1Edx {
-    uint32_t value;
-    struct {
-      uint32_t           : 22,
-               mmx_amd   : 1,
-               mmx       : 1,
-               fxsr      : 1,
-                         : 4,
-               long_mode : 1,
-               tdnow2    : 1,
-               tdnow     : 1;
-    } bits;
-  };
-
-  union ExtCpuid1Ecx {
-    uint32_t value;
-    struct {
-      uint32_t LahfSahf     : 1,
-               CmpLegacy    : 1,
-                            : 4,
-               abm          : 1,
-               sse4a        : 1,
-               misalignsse  : 1,
-               prefetchw    : 1,
-                            : 22;
-    } bits;
-  };
-
-  union ExtCpuid5Ex {
-    uint32_t value;
-    struct {
-      uint32_t L1_line_size : 8,
-               L1_tag_lines : 8,
-               L1_assoc     : 8,
-               L1_size      : 8;
-    } bits;
-  };
-
-  union ExtCpuid8Ecx {
-    uint32_t value;
-    struct {
-      uint32_t cores_per_cpu : 8,
-                             : 24;
-    } bits;
-  };
-
-protected:
-   static int _cpu;
-   static int _model;
-   static int _stepping;
-   static int _cpuFeatures;     // features returned by the "cpuid" instruction
-                                // 0 if this instruction is not available
-   static const char* _features_str;
-
-   enum {
-     CPU_CX8  = (1 << 0), // next bits are from cpuid 1 (EDX)
-     CPU_CMOV = (1 << 1),
-     CPU_FXSR = (1 << 2),
-     CPU_HT   = (1 << 3),
-     CPU_MMX  = (1 << 4),
-     CPU_3DNOW= (1 << 5),
-     CPU_SSE  = (1 << 6),
-     CPU_SSE2 = (1 << 7),
-     CPU_SSE3 = (1 << 8),
-     CPU_SSSE3= (1 << 9),
-     CPU_SSE4A= (1 <<10),
-     CPU_SSE4_1 = (1 << 11),
-     CPU_SSE4_2 = (1 << 12)
-   } cpuFeatureFlags;
-
-  // cpuid information block.  All info derived from executing cpuid with
-  // various function numbers is stored here.  Intel and AMD info is
-  // merged in this block: accessor methods disentangle it.
-  //
-  // The info block is laid out in subblocks of 4 dwords corresponding to
-  // eax, ebx, ecx and edx, whether or not they contain anything useful.
-  struct CpuidInfo {
-    // cpuid function 0
-    uint32_t std_max_function;
-    uint32_t std_vendor_name_0;
-    uint32_t std_vendor_name_1;
-    uint32_t std_vendor_name_2;
-
-    // cpuid function 1
-    StdCpuid1Eax std_cpuid1_eax;
-    StdCpuid1Ebx std_cpuid1_ebx;
-    StdCpuid1Ecx std_cpuid1_ecx;
-    StdCpuid1Edx std_cpuid1_edx;
-
-    // cpuid function 4 (deterministic cache parameters)
-    DcpCpuid4Eax dcp_cpuid4_eax;
-    DcpCpuid4Ebx dcp_cpuid4_ebx;
-    uint32_t     dcp_cpuid4_ecx; // unused currently
-    uint32_t     dcp_cpuid4_edx; // unused currently
-
-    // cpuid function 0x80000000 // example, unused
-    uint32_t ext_max_function;
-    uint32_t ext_vendor_name_0;
-    uint32_t ext_vendor_name_1;
-    uint32_t ext_vendor_name_2;
-
-    // cpuid function 0x80000001
-    uint32_t     ext_cpuid1_eax; // reserved
-    uint32_t     ext_cpuid1_ebx; // reserved
-    ExtCpuid1Ecx ext_cpuid1_ecx;
-    ExtCpuid1Edx ext_cpuid1_edx;
-
-    // cpuid functions 0x80000002 thru 0x80000004: example, unused
-    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
-    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
-    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
-
-    // cpuid function 0x80000005 //AMD L1, Intel reserved
-    uint32_t     ext_cpuid5_eax; // unused currently
-    uint32_t     ext_cpuid5_ebx; // reserved
-    ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
-    ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
-
-    // cpuid function 0x80000008
-    uint32_t     ext_cpuid8_eax; // unused currently
-    uint32_t     ext_cpuid8_ebx; // reserved
-    ExtCpuid8Ecx ext_cpuid8_ecx;
-    uint32_t     ext_cpuid8_edx; // reserved
-  };
-
-  // The actual cpuid info block
-  static CpuidInfo _cpuid_info;
-
-  // Extractors and predicates
-  static uint32_t extended_cpu_family() {
-    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
-    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
-    return result;
-  }
-  static uint32_t extended_cpu_model() {
-    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
-    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
-    return result;
-  }
-  static uint32_t cpu_stepping() {
-    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
-    return result;
-  }
-  static uint logical_processor_count() {
-    uint result = threads_per_core();
-    return result;
-  }
-  static uint32_t feature_flags() {
-    uint32_t result = 0;
-    if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
-      result |= CPU_CX8;
-    if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
-      result |= CPU_CMOV;
-    if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() &&
-        _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)
-      result |= CPU_FXSR;
-    // HT flag is set for multi-core processors also.
-    if (threads_per_core() > 1)
-      result |= CPU_HT;
-    if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
-        _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
-      result |= CPU_MMX;
-    if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
-      result |= CPU_3DNOW;
-    if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
-      result |= CPU_SSE;
-    if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
-      result |= CPU_SSE2;
-    if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
-      result |= CPU_SSE3;
-    if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
-      result |= CPU_SSSE3;
-    if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
-      result |= CPU_SSE4A;
-    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
-      result |= CPU_SSE4_1;
-    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
-      result |= CPU_SSE4_2;
-    return result;
-  }
-
-  static void get_processor_features();
-
-public:
-  // Offsets for cpuid asm stub
-  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
-  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
-  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
-  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
-  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
-  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
-
-  // Initialization
-  static void initialize();
-
-  // Asserts
-  static void assert_is_initialized() {
-    assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
-  }
-
-  //
-  // Processor family:
-  //       3   -  386
-  //       4   -  486
-  //       5   -  Pentium
-  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
-  //              Pentium M, Core Solo, Core Duo, Core2 Duo
-  //    family 6 model:   9,        13,       14,        15
-  //    0x0f   -  Pentium 4, Opteron
-  //
-  // Note: The cpu family should be used to select between
-  //       instruction sequences which are valid on all Intel
-  //       processors.  Use the feature test functions below to
-  //       determine whether a particular instruction is supported.
-  //
-  static int  cpu_family()        { return _cpu;}
-  static bool is_P6()             { return cpu_family() >= 6; }
-
-  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
-  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
-
-  static uint cores_per_cpu()  {
-    uint result = 1;
-    if (is_intel()) {
-      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
-    } else if (is_amd()) {
-      result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
-    }
-    return result;
-  }
-
-  static uint threads_per_core()  {
-    uint result = 1;
-    if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
-      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
-               cores_per_cpu();
-    }
-    return result;
-  }
-
-  static intx L1_data_cache_line_size()  {
-    intx result = 0;
-    if (is_intel()) {
-      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
-    } else if (is_amd()) {
-      result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
-    }
-    if (result < 32) // not defined ?
-      result = 32;   // 32 bytes by default for other x64
-    return result;
-  }
-
-  //
-  // Feature identification
-  //
-  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
-  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
-  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
-  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
-  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
-  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
-  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
-  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
-  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
-  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
-  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
-  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
-  //
-  // AMD features
-  //
-  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
-  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
-  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
-  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
-
-  static bool supports_compare_and_exchange() { return true; }
-
-  static const char* cpu_features()           { return _features_str; }
-
-  static intx allocate_prefetch_distance() {
-    // This method should be called before allocate_prefetch_style().
-    //
-    // Hardware prefetching (distance/size in bytes):
-    // Pentium 4 - 256 / 128
-    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
-    // Core      - 128 /  64
-    //
-    // Software prefetching (distance in bytes / instruction with best score):
-    // Pentium 4 - 512 / prefetchnta
-    // Opteron   - 256 / prefetchnta
-    // Core      - 256 / prefetchnta
-    // It will be used only when AllocatePrefetchStyle > 0
-
-    intx count = AllocatePrefetchDistance;
-    if (count < 0) {  // default ?
-      if (is_amd()) { // AMD
-        count = 256;  // Opteron
-      } else {        // Intel
-        if (cpu_family() == 6) {
-          count = 256;// Pentium M, Core, Core2
-        } else {
-          count = 512;// Pentium 4
-        }
-      }
-    }
-    return count;
-  }
-  static intx allocate_prefetch_style() {
-    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
-    // Return 0 if AllocatePrefetchDistance was not defined.
-    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
-  }
-
-  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
-  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
-  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
-  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
-
-  // gc copy/scan is disabled if prefetchw isn't supported, because
-  // Prefetch::write emits an inlined prefetchw on Linux.
-  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
-  // The used prefetcht0 instruction works for both amd64 and em64t.
-  static intx prefetch_copy_interval_in_bytes() {
-    intx interval = PrefetchCopyIntervalInBytes;
-    return interval >= 0 ? interval : 576;
-  }
-  static intx prefetch_scan_interval_in_bytes() {
-    intx interval = PrefetchScanIntervalInBytes;
-    return interval >= 0 ? interval : 576;
-  }
-  static intx prefetch_fields_ahead() {
-    intx count = PrefetchFieldsAhead;
-    return count >= 0 ? count : 1;
-  }
-};
--- a/hotspot/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -34,10 +34,16 @@
 extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
 #endif
 
-// used by compiler only; may use only caller saved registers rax, rbx, rcx.
-// rdx holds first int arg, rsi, rdi, rbp are callee-save & must be preserved.
-// Leave receiver in rcx; required behavior when +OptoArgsInRegisters
-// is modifed to put first oop in rcx.
+// These stubs are used by the compiler only.
+// Argument registers, which must be preserved:
+//   rcx - receiver (always first argument)
+//   rdx - second argument (if any)
+// Other registers that might be usable:
+//   rax - inline cache register (is interface for itable stub)
+//   rbx - method (used when calling out to interpreter)
+// Available now, but may become callee-save at some point:
+//   rsi, rdi
+// Note that rax and rdx are also used for return values.
 //
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
   const int i486_code_length = VtableStub::pd_code_size_limit(true);
@@ -94,16 +100,25 @@
   __ jmp( Address(method, methodOopDesc::from_compiled_offset()));
 
   masm->flush();
+
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  vtable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
+  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+
   s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 
-VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
   // Note well: pd_code_size_limit is the absolute minimum we can get away with.  If you
   //            add code here, bump the code stub size returned by pd_code_size_limit!
   const int i486_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(i486_code_length) VtableStub(false, vtable_index);
+  VtableStub* s = new(i486_code_length) VtableStub(false, itable_index);
   ResourceMark rm;
   CodeBuffer cb(s->entry_point(), i486_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -123,50 +138,19 @@
 
   // get receiver klass (also an implicit null-check)
   address npe_addr = __ pc();
-  __ movptr(rbx, Address(rcx, oopDesc::klass_offset_in_bytes()));
-
-  __ mov(rsi, rbx);   // Save klass in free register
-  // Most registers are in use, so save a few
-  __ push(rdx);
-  // compute itable entry offset (in words)
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
-  __ movl(rdx, Address(rbx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable
-  __ lea(rbx, Address(rbx, rdx, Address::times_ptr, base));
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    __ round_to(rbx, BytesPerLong);
-  }
-
-  Label hit, next, entry, throw_icce;
-
-  __ jmpb(entry);
+  __ movptr(rsi, Address(rcx, oopDesc::klass_offset_in_bytes()));
 
-  __ bind(next);
-  __ addptr(rbx, itableOffsetEntry::size() * wordSize);
-
-  __ bind(entry);
-
-  // If the entry is NULL then we've reached the end of the table
-  // without finding the expected interface, so throw an exception
-  __ movptr(rdx, Address(rbx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testptr(rdx, rdx);
-  __ jcc(Assembler::zero, throw_icce);
-  __ cmpptr(rax, rdx);
-  __ jcc(Assembler::notEqual, next);
-
-  // We found a hit, move offset into rbx,
-  __ movl(rdx, Address(rbx, itableOffsetEntry::offset_offset_in_bytes()));
-
-  // Compute itableMethodEntry.
-  const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) + itableMethodEntry::method_offset_in_bytes();
+  // Most registers are in use; we'll use rax, rbx, rsi, rdi
+  // (If we need to make rsi, rdi callee-save, do a push/pop here.)
+  const Register method = rbx;
+  Label throw_icce;
 
   // Get methodOop and entrypoint for compiler
-  const Register method = rbx;
-  __ movptr(method, Address(rsi, rdx, Address::times_1, method_offset));
-
-  // Restore saved register, before possible trap.
-  __ pop(rdx);
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             rsi, rax, itable_index,
+                             // outputs: method, scan temp. reg
+                             method, rdi,
+                             throw_icce);
 
   // method (rbx): methodOop
   // rcx: receiver
@@ -187,12 +171,15 @@
   __ jmp(Address(method, methodOopDesc::from_compiled_offset()));
 
   __ bind(throw_icce);
-  // Restore saved register
-  __ pop(rdx);
   __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
-
   masm->flush();
 
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  itable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
   guarantee(__ pc() <= s->code_end(), "overflowed buffer");
 
   s->set_exception_points(npe_addr, ame_addr);
@@ -207,7 +194,7 @@
     return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0);
   } else {
     // Itable stub size
-    return (DebugVtables ? 144 : 64) + (CountCompiledCalls ? 6 : 0);
+    return (DebugVtables ? 256 : 66) + (CountCompiledCalls ? 6 : 0);
   }
 }
 
--- a/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -98,17 +98,26 @@
   __ jmp( Address(rbx, methodOopDesc::from_compiled_offset()));
 
   __ flush();
+
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  vtable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
+  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+
   s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 
-VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
   // Note well: pd_code_size_limit is the absolute minimum we can get
   // away with.  If you add code here, bump the code stub size
   // returned by pd_code_size_limit!
   const int amd64_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(amd64_code_length) VtableStub(false, vtable_index);
+  VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index);
   ResourceMark rm;
   CodeBuffer cb(s->entry_point(), amd64_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -131,68 +140,28 @@
   // get receiver klass (also an implicit null-check)
   address npe_addr = __ pc();
 
-  __ load_klass(rbx, j_rarg0);
+  // Most registers are in use; we'll use rax, rbx, r10, r11
+  // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them)
+  __ load_klass(r10, j_rarg0);
 
   // If we take a trap while this arg is on the stack we will not
   // be able to walk the stack properly. This is not an issue except
   // when there are mistakes in this assembly code that could generate
   // a spurious fault. Ask me how I know...
 
-  __ push(j_rarg1);     // Most registers are in use, so save one
-
-  // compute itable entry offset (in words)
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == 8,
-         "adjust the scaling in the code below");
-  // Get length of vtable
-  __ movl(j_rarg1,
-          Address(rbx, instanceKlass::vtable_length_offset() * wordSize));
-  __ lea(rbx, Address(rbx, j_rarg1, Address::times_8, base));
-
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    __ round_to(rbx, BytesPerLong);
-  }
-  Label hit, next, entry, throw_icce;
-
-  __ jmpb(entry);
-
-  __ bind(next);
-  __ addptr(rbx, itableOffsetEntry::size() * wordSize);
-
-  __ bind(entry);
-
-  // If the entry is NULL then we've reached the end of the table
-  // without finding the expected interface, so throw an exception
-  __ movptr(j_rarg1, Address(rbx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testptr(j_rarg1, j_rarg1);
-  __ jcc(Assembler::zero, throw_icce);
-  __ cmpptr(rax, j_rarg1);
-  __ jccb(Assembler::notEqual, next);
-
-  // We found a hit, move offset into j_rarg1
-  __ movl(j_rarg1, Address(rbx, itableOffsetEntry::offset_offset_in_bytes()));
-
-  // Compute itableMethodEntry
-  const int method_offset =
-    (itableMethodEntry::size() * wordSize * vtable_index) +
-    itableMethodEntry::method_offset_in_bytes();
+  const Register method = rbx;
+  Label throw_icce;
 
   // Get methodOop and entrypoint for compiler
-
-  // Get klass pointer again
-  __ load_klass(rax, j_rarg0);
-
-  const Register method = rbx;
-  __ movptr(method, Address(rax, j_rarg1, Address::times_1, method_offset));
-
-  // Restore saved register, before possible trap.
-  __ pop(j_rarg1);
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             r10, rax, itable_index,
+                             // outputs: method, scan temp. reg
+                             method, r11,
+                             throw_icce);
 
   // method (rbx): methodOop
   // j_rarg0: receiver
 
-
 #ifdef ASSERT
   if (DebugVtables) {
     Label L2;
@@ -211,12 +180,16 @@
   __ jmp(Address(method, methodOopDesc::from_compiled_offset()));
 
   __ bind(throw_icce);
-  // Restore saved register
-  __ pop(j_rarg1);
   __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
 
   __ flush();
 
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  itable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
   guarantee(__ pc() <= s->code_end(), "overflowed buffer");
 
   s->set_exception_points(npe_addr, ame_addr);
@@ -230,7 +203,7 @@
            (UseCompressedOops ? 16 : 0);  // 1 leaq can be 3 bytes + 1 long
   } else {
     // Itable stub size
-    return (DebugVtables ? 636 : 72) + (CountCompiledCalls ? 13 : 0) +
+    return (DebugVtables ? 512 : 72) + (CountCompiledCalls ? 13 : 0) +
            (UseCompressedOops ? 32 : 0);  // 2 leaqs
   }
 }
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Thu Mar 12 18:16:36 2009 -0700
@@ -130,7 +130,7 @@
 // allocation.  Highest priority is first.  A useful heuristic is to
 // give registers a low priority when they are required by machine
 // instructions, like EAX and EDX.  Registers which are used as
-// pairs must fall on an even boundry (witness the FPR#L's in this list).
+// pairs must fall on an even boundary (witness the FPR#L's in this list).
 // For the Intel integer registers, the equivalent Long pairs are
 // EDX:EAX, EBX:ECX, and EDI:EBP.
 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
@@ -3126,14 +3126,12 @@
 
   enc_class movq_ld(regXD dst, memory mem) %{
     MacroAssembler _masm(&cbuf);
-    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ movq(as_XMMRegister($dst$$reg), madr);
+    __ movq($dst$$XMMRegister, $mem$$Address);
   %}
 
   enc_class movq_st(memory mem, regXD src) %{
     MacroAssembler _masm(&cbuf);
-    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ movq(madr, as_XMMRegister($src$$reg));
+    __ movq($mem$$Address, $src$$XMMRegister);
   %}
 
   enc_class pshufd_8x8(regX dst, regX src) %{
@@ -3751,8 +3749,8 @@
     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
 
     // Load first characters
-    masm.load_unsigned_word(rcx, Address(rbx, 0));
-    masm.load_unsigned_word(rdi, Address(rax, 0));
+    masm.load_unsigned_short(rcx, Address(rbx, 0));
+    masm.load_unsigned_short(rdi, Address(rax, 0));
 
     // Compare first characters
     masm.subl(rcx, rdi);
@@ -3782,8 +3780,8 @@
 
     // Compare the rest of the characters
     masm.bind(WHILE_HEAD_LABEL);
-    masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
-    masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
+    masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
+    masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
     masm.subl(rcx, rdi);
     masm.jcc(Assembler::notZero, POP_LABEL);
     masm.incrementl(rsi);
@@ -3840,8 +3838,8 @@
     masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
 
     // Compare 2-byte "tail" at end of arrays
-    masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
-    masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
+    masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
+    masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
     masm.cmpl(tmp1Reg, tmp2Reg);
     masm.jcc(Assembler::notEqual, FALSE_LABEL);
     masm.testl(resultReg, resultReg);
@@ -5857,7 +5855,7 @@
 
 //----------OPERAND CLASSES----------------------------------------------------
 // Operand Classes are groups of operands that are used as to simplify
-// instruction definitions by not requiring the AD writer to specify seperate
+// instruction definitions by not requiring the AD writer to specify separate
 // instructions for every form of operand when the instruction accepts
 // multiple operand types with the same basic encoding and format.  The classic
 // case of this is memory operands.
@@ -6396,21 +6394,94 @@
   match(Set dst (LoadB mem));
 
   ins_cost(125);
-  format %{ "MOVSX8 $dst,$mem" %}
-  opcode(0xBE, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
-%}
-
-// Load Byte (8bit UNsigned)
-instruct loadUB(xRegI dst, memory mem, immI_255 bytemask) %{
-  match(Set dst (AndI (LoadB mem) bytemask));
+  format %{ "MOVSX8 $dst,$mem\t# byte" %}
+
+  ins_encode %{
+    __ movsbl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Byte (8bit signed) into Long Register
+instruct loadB2L(eRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(375);
+  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
+            "MOV    $dst.hi,$dst.lo\n\t"
+            "SAR    $dst.hi,7" %}
+
+  ins_encode %{
+    __ movsbl($dst$$Register, $mem$$Address);
+    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
+    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Byte (8bit UNsigned)
+instruct loadUB(xRegI dst, memory mem) %{
+  match(Set dst (LoadUB mem));
 
   ins_cost(125);
-  format %{ "MOVZX8 $dst,$mem" %}
-  opcode(0xB6, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
+  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
+
+  ins_encode %{
+    __ movzbl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Byte (8 bit UNsigned) into Long Register
+instruct loadUB2L(eRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(250);
+  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+
+  ins_encode %{
+    __ movzbl($dst$$Register, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Short (16bit signed)
+instruct loadS(eRegI dst, memory mem) %{
+  match(Set dst (LoadS mem));
+
+  ins_cost(125);
+  format %{ "MOVSX  $dst,$mem\t# short" %}
+
+  ins_encode %{
+    __ movswl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Short (16bit signed) into Long Register
+instruct loadS2L(eRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(375);
+  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
+            "MOV    $dst.hi,$dst.lo\n\t"
+            "SAR    $dst.hi,15" %}
+
+  ins_encode %{
+    __ movswl($dst$$Register, $mem$$Address);
+    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
+    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
+  %}
+
+  ins_pipe(ialu_reg_mem);
 %}
 
 // Load Unsigned Short/Char (16bit unsigned)
@@ -6418,10 +6489,30 @@
   match(Set dst (LoadUS mem));
 
   ins_cost(125);
-  format %{ "MOVZX  $dst,$mem" %}
-  opcode(0xB7, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
+  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
+
+  ins_encode %{
+    __ movzwl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
+instruct loadUS2L(eRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(250);
+  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+
+  ins_encode %{
+    __ movzwl($dst$$Register, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
+  %}
+
+  ins_pipe(ialu_reg_mem);
 %}
 
 // Load Integer
@@ -6429,10 +6520,47 @@
   match(Set dst (LoadI mem));
 
   ins_cost(125);
-  format %{ "MOV    $dst,$mem" %}
-  opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
+  format %{ "MOV    $dst,$mem\t# int" %}
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Integer into Long Register
+instruct loadI2L(eRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadI mem)));
+
+  ins_cost(375);
+  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
+            "MOV    $dst.hi,$dst.lo\n\t"
+            "SAR    $dst.hi,31" %}
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
+    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Integer into Long Register
+instruct loadUI2L(eRegL dst, memory mem) %{
+  match(Set dst (LoadUI2L mem));
+
+  ins_cost(250);
+  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
+  %}
+
+  ins_pipe(ialu_reg_mem);
 %}
 
 // Load Long.  Cannot clobber address while loading, so restrict address
@@ -6442,11 +6570,17 @@
   match(Set dst (LoadL mem));
 
   ins_cost(250);
-  format %{ "MOV    $dst.lo,$mem\n\t"
+  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
             "MOV    $dst.hi,$mem+4" %}
-  opcode(0x8B, 0x8B);
-  ins_encode( OpcP, RegMem(dst,mem), OpcS, RegMem_Hi(dst,mem));
-  ins_pipe( ialu_reg_long_mem );
+
+  ins_encode %{
+    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
+    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
+    __ movl($dst$$Register, Amemlo);
+    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
+  %}
+
+  ins_pipe(ialu_reg_long_mem);
 %}
 
 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
@@ -6521,17 +6655,6 @@
   ins_pipe( ialu_reg_mem );
 %}
 
-// Load Short (16bit signed)
-instruct loadS(eRegI dst, memory mem) %{
-  match(Set dst (LoadS mem));
-
-  ins_cost(125);
-  format %{ "MOVSX  $dst,$mem" %}
-  opcode(0xBF, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
-%}
-
 // Load Double
 instruct loadD(regD dst, memory mem) %{
   predicate(UseSSE<=1);
@@ -7957,7 +8080,7 @@
     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
     if( os::is_MP() )
       __ lock();
-    __ cmpxchg8(Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp));
+    __ cmpxchg8($mem$$Address);
     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
   %}
   ins_pipe( pipe_cmpxchg );
@@ -11467,6 +11590,7 @@
 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
   match(Set dst (ConvI2L src));
   effect(KILL cr);
+  ins_cost(375);
   format %{ "MOV    $dst.lo,$src\n\t"
             "MOV    $dst.hi,$src\n\t"
             "SAR    $dst.hi,31" %}
@@ -11478,6 +11602,7 @@
 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
   match(Set dst (AndL (ConvI2L src) mask) );
   effect( KILL flags );
+  ins_cost(250);
   format %{ "MOV    $dst.lo,$src\n\t"
             "XOR    $dst.hi,$dst.hi" %}
   opcode(0x33); // XOR
@@ -11489,6 +11614,7 @@
 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
   match(Set dst (AndL src mask) );
   effect( KILL flags );
+  ins_cost(250);
   format %{ "MOV    $dst.lo,$src.lo\n\t"
             "XOR    $dst.hi,$dst.hi\n\t" %}
   opcode(0x33); // XOR
@@ -13220,7 +13346,7 @@
 // These must follow all instruction definitions as they use the names
 // defined in the instructions definitions.
 //
-// peepmatch ( root_instr_name [preceeding_instruction]* );
+// peepmatch ( root_instr_name [preceding_instruction]* );
 //
 // peepconstraint %{
 // (instruction_number.operand_name relational_op instruction_number.operand_name
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Thu Mar 12 18:16:36 2009 -0700
@@ -3462,14 +3462,12 @@
 
   enc_class movq_ld(regD dst, memory mem) %{
     MacroAssembler _masm(&cbuf);
-    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ movq(as_XMMRegister($dst$$reg), madr);
+    __ movq($dst$$XMMRegister, $mem$$Address);
   %}
 
   enc_class movq_st(memory mem, regD src) %{
     MacroAssembler _masm(&cbuf);
-    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ movq(madr, as_XMMRegister($src$$reg));
+    __ movq($mem$$Address, $src$$XMMRegister);
   %}
 
   enc_class pshufd_8x8(regF dst, regF src) %{
@@ -3765,8 +3763,8 @@
     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
 
     // Load first characters
-    masm.load_unsigned_word(rcx, Address(rbx, 0));
-    masm.load_unsigned_word(rdi, Address(rax, 0));
+    masm.load_unsigned_short(rcx, Address(rbx, 0));
+    masm.load_unsigned_short(rdi, Address(rax, 0));
 
     // Compare first characters
     masm.subl(rcx, rdi);
@@ -3796,8 +3794,8 @@
 
     // Compare the rest of the characters
     masm.bind(WHILE_HEAD_LABEL);
-    masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
-    masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
+    masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
+    masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
     masm.subl(rcx, rdi);
     masm.jcc(Assembler::notZero, POP_LABEL);
     masm.increment(rsi);
@@ -3854,8 +3852,8 @@
     masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
 
     // Compare 2-byte "tail" at end of arrays
-    masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
-    masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
+    masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
+    masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
     masm.cmpl(tmp1Reg, tmp2Reg);
     masm.jcc(Assembler::notEqual, FALSE_LABEL);
     masm.testl(resultReg, resultReg);
@@ -5483,7 +5481,7 @@
 
 //----------OPERAND CLASSES----------------------------------------------------
 // Operand Classes are groups of operands that are used as to simplify
-// instruction definitions by not requiring the AD writer to specify seperate
+// instruction definitions by not requiring the AD writer to specify separate
 // instructions for every form of operand when the instruction accepts
 // multiple operand types with the same basic encoding and format.  The classic
 // case of this is memory operands.
@@ -6031,70 +6029,88 @@
 
   ins_cost(125);
   format %{ "movsbl  $dst, $mem\t# byte" %}
-  opcode(0x0F, 0xBE);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movsbl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
-// Load Byte (8 bit signed) into long
-// instruct loadB2L(rRegL dst, memory mem)
-// %{
-//   match(Set dst (ConvI2L (LoadB mem)));
-
-//   ins_cost(125);
-//   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
-//   opcode(0x0F, 0xBE);
-//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
-//   ins_pipe(ialu_reg_mem);
-// %}
-
-// Load Byte (8 bit UNsigned)
-instruct loadUB(rRegI dst, memory mem, immI_255 bytemask)
-%{
-  match(Set dst (AndI (LoadB mem) bytemask));
+// Load Byte (8 bit signed) into Long Register
+instruct loadB2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(125);
+  format %{ "movsbq  $dst, $mem\t# byte -> long" %}
+
+  ins_encode %{
+    __ movsbq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Byte (8 bit UNsigned)
+instruct loadUB(rRegI dst, memory mem)
+%{
+  match(Set dst (LoadUB mem));
 
   ins_cost(125);
   format %{ "movzbl  $dst, $mem\t# ubyte" %}
-  opcode(0x0F, 0xB6);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movzbl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
-// Load Byte (8 bit UNsigned) into long
-// instruct loadUB2L(rRegL dst, memory mem, immI_255 bytemask)
-// %{
-//   match(Set dst (ConvI2L (AndI (LoadB mem) bytemask)));
-
-//   ins_cost(125);
-//   format %{ "movzbl  $dst, $mem\t# ubyte -> long" %}
-//   opcode(0x0F, 0xB6);
-//   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
-//   ins_pipe(ialu_reg_mem);
-// %}
+// Load Unsigned Byte (8 bit UNsigned) into Long Register
+instruct loadUB2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(125);
+  format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
+
+  ins_encode %{
+    __ movzbq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
 
 // Load Short (16 bit signed)
 instruct loadS(rRegI dst, memory mem)
 %{
   match(Set dst (LoadS mem));
 
-  ins_cost(125); // XXX
+  ins_cost(125);
   format %{ "movswl $dst, $mem\t# short" %}
-  opcode(0x0F, 0xBF);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movswl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
-// Load Short (16 bit signed) into long
-// instruct loadS2L(rRegL dst, memory mem)
-// %{
-//   match(Set dst (ConvI2L (LoadS mem)));
-
-//   ins_cost(125); // XXX
-//   format %{ "movswq $dst, $mem\t# short -> long" %}
-//   opcode(0x0F, 0xBF);
-//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
-//   ins_pipe(ialu_reg_mem);
-// %}
+// Load Short (16 bit signed) into Long Register
+instruct loadS2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(125);
+  format %{ "movswq $dst, $mem\t# short -> long" %}
+
+  ins_encode %{
+    __ movswq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
 
 // Load Unsigned Short/Char (16 bit UNsigned)
 instruct loadUS(rRegI dst, memory mem)
@@ -6103,32 +6119,71 @@
 
   ins_cost(125);
   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
-  opcode(0x0F, 0xB7);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movzwl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
-// Load Unsigned Short/Char (16 bit UNsigned) into long
-// instruct loadUS2L(rRegL dst, memory mem)
-// %{
-//   match(Set dst (ConvI2L (LoadUS mem)));
-
-//   ins_cost(125);
-//   format %{ "movzwl  $dst, $mem\t# ushort/char -> long" %}
-//   opcode(0x0F, 0xB7);
-//   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
-//   ins_pipe(ialu_reg_mem);
-// %}
+// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
+instruct loadUS2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(125);
+  format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
+
+  ins_encode %{
+    __ movzwq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
 
 // Load Integer
 instruct loadI(rRegI dst, memory mem)
 %{
   match(Set dst (LoadI mem));
 
-  ins_cost(125); // XXX
+  ins_cost(125);
   format %{ "movl    $dst, $mem\t# int" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Integer into Long Register
+instruct loadI2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadI mem)));
+
+  ins_cost(125);
+  format %{ "movslq  $dst, $mem\t# int -> long" %}
+
+  ins_encode %{
+    __ movslq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Integer into Long Register
+instruct loadUI2L(rRegL dst, memory mem)
+%{
+  match(Set dst (LoadUI2L mem));
+
+  ins_cost(125);
+  format %{ "movl    $dst, $mem\t# uint -> long" %}
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -6137,10 +6192,13 @@
 %{
   match(Set dst (LoadL mem));
 
-  ins_cost(125); // XXX
+  ins_cost(125);
   format %{ "movq    $dst, $mem\t# long" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movq($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem); // XXX
 %}
 
@@ -8363,7 +8421,7 @@
 //----------- DivL-By-Constant-Expansions--------------------------------------
 // DivI cases are handled by the compiler
 
-// Magic constant, reciprical of 10
+// Magic constant, reciprocal of 10
 instruct loadConL_0x6666666666666667(rRegL dst)
 %{
   effect(DEF dst);
@@ -10804,16 +10862,6 @@
 //   ins_pipe(ialu_reg_reg);
 // %}
 
-instruct convI2L_reg_mem(rRegL dst, memory src)
-%{
-  match(Set dst (ConvI2L (LoadI src)));
-
-  format %{ "movslq  $dst, $src\t# i2l" %}
-  opcode(0x63); // needs REX.W
-  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst,src));
-  ins_pipe(ialu_reg_mem);
-%}
-
 // Zero-extend convert int to long
 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
 %{
@@ -12082,7 +12130,7 @@
 // These must follow all instruction definitions as they use the names
 // defined in the instructions definitions.
 //
-// peepmatch ( root_instr_name [precerding_instruction]* );
+// peepmatch ( root_instr_name [preceding_instruction]* );
 //
 // peepconstraint %{
 // (instruction_number.operand_name relational_op instruction_number.operand_name
--- a/hotspot/src/os/linux/launcher/java.c	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os/linux/launcher/java.c	Thu Mar 12 18:16:36 2009 -0700
@@ -419,7 +419,7 @@
             goto leave;
         }
         mainClass = LoadClass(env, classname);
-        if(mainClass == NULL) { /* exception occured */
+        if(mainClass == NULL) { /* exception occurred */
             ReportExceptionDescription(env);
             message = "Could not find the main class.  Program will exit.";
             goto leave;
@@ -441,7 +441,7 @@
         goto leave;
       }
       mainClass = LoadClass(env, classname);
-      if(mainClass == NULL) { /* exception occured */
+      if(mainClass == NULL) { /* exception occurred */
         ReportExceptionDescription(env);
         message = "Could not find the main class. Program will exit.";
         goto leave;
--- a/hotspot/src/os/linux/launcher/java_md.h	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os/linux/launcher/java_md.h	Thu Mar 12 18:16:36 2009 -0700
@@ -47,7 +47,7 @@
 #ifdef JAVA_ARGS
 /*
  * ApplicationHome is prepended to each of these entries; the resulting
- * strings are concatenated (seperated by PATH_SEPARATOR) and used as the
+ * strings are concatenated (separated by PATH_SEPARATOR) and used as the
  * value of -cp option to the launcher.
  */
 #ifndef APP_CLASSPATH
--- a/hotspot/src/os/linux/vm/perfMemory_linux.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os/linux/vm/perfMemory_linux.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -192,7 +192,7 @@
 // check if the given path is considered a secure directory for
 // the backing store files. Returns true if the directory exists
 // and is considered a secure location. Returns false if the path
-// is a symbolic link or if an error occured.
+// is a symbolic link or if an error occurred.
 //
 static bool is_directory_secure(const char* path) {
   struct stat statbuf;
--- a/hotspot/src/os/solaris/launcher/java.c	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os/solaris/launcher/java.c	Thu Mar 12 18:16:36 2009 -0700
@@ -419,7 +419,7 @@
             goto leave;
         }
         mainClass = LoadClass(env, classname);
-        if(mainClass == NULL) { /* exception occured */
+        if(mainClass == NULL) { /* exception occurred */
             ReportExceptionDescription(env);
             message = "Could not find the main class.  Program will exit.";
             goto leave;
@@ -441,7 +441,7 @@
         goto leave;
       }
       mainClass = LoadClass(env, classname);
-      if(mainClass == NULL) { /* exception occured */
+      if(mainClass == NULL) { /* exception occurred */
         ReportExceptionDescription(env);
         message = "Could not find the main class. Program will exit.";
         goto leave;
--- a/hotspot/src/os/solaris/launcher/java_md.h	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os/solaris/launcher/java_md.h	Thu Mar 12 18:16:36 2009 -0700
@@ -47,7 +47,7 @@
 #ifdef JAVA_ARGS
 /*
  * ApplicationHome is prepended to each of these entries; the resulting
- * strings are concatenated (seperated by PATH_SEPARATOR) and used as the
+ * strings are concatenated (separated by PATH_SEPARATOR) and used as the
  * value of -cp option to the launcher.
  */
 #ifndef APP_CLASSPATH
--- a/hotspot/src/os/solaris/vm/perfMemory_solaris.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os/solaris/vm/perfMemory_solaris.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -194,7 +194,7 @@
 // check if the given path is considered a secure directory for
 // the backing store files. Returns true if the directory exists
 // and is considered a secure location. Returns false if the path
-// is a symbolic link or if an error occured.
+// is a symbolic link or if an error occurred.
 //
 static bool is_directory_secure(const char* path) {
   struct stat statbuf;
--- a/hotspot/src/os/windows/vm/perfMemory_windows.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os/windows/vm/perfMemory_windows.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -195,7 +195,7 @@
 // check if the given path is considered a secure directory for
 // the backing store files. Returns true if the directory exists
 // and is considered a secure location. Returns false if the path
-// is a symbolic link or if an error occured.
+// is a symbolic link or if an error occurred.
 //
 static bool is_directory_secure(const char* path) {
 
@@ -994,7 +994,7 @@
     return false;
   }
 
-  // if running on windows 2000 or later, set the automatic inheritence
+  // if running on windows 2000 or later, set the automatic inheritance
   // control flags.
   SetSecurityDescriptorControlFnPtr _SetSecurityDescriptorControl;
   _SetSecurityDescriptorControl = (SetSecurityDescriptorControlFnPtr)
@@ -1002,7 +1002,7 @@
                       "SetSecurityDescriptorControl");
 
   if (_SetSecurityDescriptorControl != NULL) {
-    // We do not want to further propogate inherited DACLs, so making them
+    // We do not want to further propagate inherited DACLs, so making them
     // protected prevents that.
     if (!_SetSecurityDescriptorControl(pSD, SE_DACL_PROTECTED,
                                             SE_DACL_PROTECTED)) {
--- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -532,7 +532,7 @@
     if (oldAct.sa_sigaction != signalHandler) {
       void* sighand = oldAct.sa_sigaction ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction)
                                           : CAST_FROM_FN_PTR(void*, oldAct.sa_handler);
-      warning("Unexpected Signal %d occured under user-defined signal handler " INTPTR_FORMAT, sig, (intptr_t)sighand);
+      warning("Unexpected Signal %d occurred under user-defined signal handler " INTPTR_FORMAT, sig, (intptr_t)sighand);
     }
   }
 
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -299,14 +299,18 @@
 
 }
 
+#endif // AMD64
+
 bool os::supports_sse() {
+#ifdef AMD64
+  return true;
+#else
   if (sse_status == SSE_UNKNOWN)
     check_for_sse_support();
   return sse_status == SSE_SUPPORTED;
+#endif // AMD64
 }
 
-#endif // AMD64
-
 bool os::is_allocatable(size_t bytes) {
 #ifdef AMD64
   return true;
@@ -690,7 +694,7 @@
     if (oldAct.sa_sigaction != signalHandler) {
       void* sighand = oldAct.sa_sigaction ? CAST_FROM_FN_PTR(void*,  oldAct.sa_sigaction)
                                           : CAST_FROM_FN_PTR(void*, oldAct.sa_handler);
-      warning("Unexpected Signal %d occured under user-defined signal handler %#lx", sig, (long)sighand);
+      warning("Unexpected Signal %d occurred under user-defined signal handler %#lx", sig, (long)sighand);
     }
   }
 
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,8 +41,9 @@
   static void  fence_bootstrap              ();
 
   static void setup_fpu();
+#endif // AMD64
+
   static bool supports_sse();
-#endif // AMD64
 
   static bool is_allocatable(size_t bytes);
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/Makefile	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,75 @@
+#
+# Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+# CA 95054 USA or visit www.sun.com if you need additional information or
+# have any questions.
+#  
+#
+PKGLIST = \
+com.sun.hotspot.tools.compiler
+#END PKGLIST
+
+FILELIST = com/sun/hotspot/tools/compiler/*.java
+
+ifneq "x$(ALT_BOOTDIR)" "x"
+  BOOTDIR := $(ALT_BOOTDIR)
+endif
+
+ifeq "x$(BOOTDIR)" "x"
+  JDK_HOME := $(shell dirname $(shell which java))/..
+else
+  JDK_HOME := $(BOOTDIR)
+endif
+
+isUnix := $(shell test -r c:/; echo $$?)
+
+ifeq "$(isUnix)" "1"
+    CPS := :
+else
+    CPS := ";"
+endif
+
+SRC_DIR    = src
+BUILD_DIR  = build
+OUTPUT_DIR = $(BUILD_DIR)/classes
+
+# gnumake 3.78.1 does not accept the *s, 
+# so use the shell to expand them
+ALLFILES := $(patsubst %,$(SRC_DIR)/%,$(FILELIST))
+ALLFILES := $(shell /bin/ls $(ALLFILES))
+
+JAVAC = $(JDK_HOME)/bin/javac
+JAR = $(JDK_HOME)/bin/jar
+
+# Tagging it on because there's no reason not to run it
+all: logc.jar
+
+logc.jar: filelist manifest.mf
+	@mkdir -p $(OUTPUT_DIR)
+	$(JAVAC) -source 1.5 -deprecation -sourcepath $(SRC_DIR) -d $(OUTPUT_DIR) @filelist
+	$(JAR) cvfm logc.jar manifest.mf -C $(OUTPUT_DIR) com
+
+.PHONY: filelist
+filelist: $(ALLFILES)
+	@rm -f $@
+	@echo $(ALLFILES) > $@
+
+clean::
+	rm -rf filelist logc.jar
+	rm -rf $(BUILD_DIR)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/README	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,18 @@
+This is a very rough tool for parsing -XX:+LogCompilation output.
+It's main purpose is to recreate output similar to
+-XX:+PrintCompilation -XX:+PrintInlining output from a debug JVM.  It
+requires a 1.5 JDK to build and simply typing make should build it.
+
+It produces a jar file, logc.jar, that can be run on the
+hotspot.log from LogCompilation output like this:
+
+  java -jar logc.jar hotspot.log
+
+This will produce something like the normal PrintCompilation output.
+Adding the -i option with also report inlining like PrintInlining.
+
+More information about the LogCompilation output can be found at 
+
+http://wikis.sun.com/display/HotSpotInternals/LogCompilation+overview
+http://wikis.sun.com/display/HotSpotInternals/PrintCompilation
+http://wikis.sun.com/display/HotSpotInternals/LogCompilation+tool
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/manifest.mf	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,1 @@
+Main-Class: com.sun.hotspot.tools.compiler.LogCompilation
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/BasicLogEvent.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+
+/**
+ *
+ * @author never
+ */
+public abstract class BasicLogEvent implements LogEvent {
+
+    protected final String id;
+    protected final double start;
+    protected double end;
+    protected Compilation compilation;
+
+    BasicLogEvent(double start, String id) {
+        this.start = start;
+        this.end = start;
+        this.id = id;
+    }
+
+    public double getStart() {
+        return start;
+    }
+
+    public double getEnd() {
+        return end;
+    }
+
+    public void setEnd(double end) {
+        this.end = end;
+    }
+
+    public double getElapsedTime() {
+        return ((int) ((getEnd() - getStart()) * 1000)) / 1000.0;
+    }
+
+    public String getId() {
+        return id;
+    }
+
+    public Compilation getCompilation() {
+        return compilation;
+    }
+
+    public void setCompilation(Compilation compilation) {
+        this.compilation = compilation;
+    }
+
+    abstract public void print(PrintStream stream);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+
+public class CallSite {
+
+    private int bci;
+    private Method method;
+    private int count;
+    private String receiver;
+    private int receiver_count;
+    private String reason;
+    private List<CallSite> calls;
+
+    CallSite() {
+    }
+
+    CallSite(int bci, Method m) {
+        this.bci = bci;
+        this.method = m;
+    }
+
+    void add(CallSite site) {
+        if (getCalls() == null) {
+            setCalls(new ArrayList<CallSite>());
+        }
+        getCalls().add(site);
+    }
+
+    CallSite last() {
+        return last(-1);
+    }
+
+    CallSite last(int fromEnd) {
+        return getCalls().get(getCalls().size() + fromEnd);
+    }
+
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        if (getReason() == null) {
+            sb.append("  @ " + getBci() + " " + getMethod());
+        } else {
+            sb.append("- @ " + getBci() + " " + getMethod() + " " + getReason());
+        }
+        sb.append("\n");
+        if (getCalls() != null) {
+            for (CallSite site : getCalls()) {
+                sb.append(site);
+                sb.append("\n");
+            }
+        }
+        return sb.toString();
+    }
+
+    public void print(PrintStream stream) {
+        print(stream, 0);
+    }
+
+    void emit(PrintStream stream, int indent) {
+        for (int i = 0; i < indent; i++) {
+            stream.print(' ');
+        }
+    }
+    private static boolean compat = true;
+
+    public void print(PrintStream stream, int indent) {
+        emit(stream, indent);
+        String m = getMethod().getHolder().replace('/', '.') + "::" + getMethod().getName();
+        if (getReason() == null) {
+            stream.println("  @ " + getBci() + " " + m + " (" + getMethod().getBytes() + " bytes)");
+
+        } else {
+            if (isCompat()) {
+                stream.println("  @ " + getBci() + " " + m + " " + getReason());
+            } else {
+                stream.println("- @ " + getBci() + " " + m +
+                        " (" + getMethod().getBytes() + " bytes) " + getReason());
+            }
+        }
+        if (getReceiver() != null) {
+            emit(stream, indent + 3);
+            //                 stream.println("type profile " + method.holder + " -> " + receiver + " (" +
+            //                                receiver_count + "/" + count + "," + (receiver_count * 100 / count) + "%)");
+            stream.println("type profile " + getMethod().getHolder() + " -> " + getReceiver() + " (" +
+                    (getReceiverCount() * 100 / getCount()) + "%)");
+        }
+        if (getCalls() != null) {
+            for (CallSite site : getCalls()) {
+                site.print(stream, indent + 2);
+            }
+        }
+    }
+
+    public int getBci() {
+        return bci;
+    }
+
+    public void setBci(int bci) {
+        this.bci = bci;
+    }
+
+    public Method getMethod() {
+        return method;
+    }
+
+    public void setMethod(Method method) {
+        this.method = method;
+    }
+
+    public int getCount() {
+        return count;
+    }
+
+    public void setCount(int count) {
+        this.count = count;
+    }
+
+    public String getReceiver() {
+        return receiver;
+    }
+
+    public void setReceiver(String receiver) {
+        this.receiver = receiver;
+    }
+
+    public int getReceiverCount() {
+        return receiver_count;
+    }
+
+    public void setReceiver_count(int receiver_count) {
+        this.receiver_count = receiver_count;
+    }
+
+    public String getReason() {
+        return reason;
+    }
+
+    public void setReason(String reason) {
+        this.reason = reason;
+    }
+
+    public List<CallSite> getCalls() {
+        return calls;
+    }
+
+    public void setCalls(List<CallSite> calls) {
+        this.calls = calls;
+    }
+
+    public static boolean isCompat() {
+        return compat;
+    }
+
+    public static void setCompat(boolean aCompat) {
+        compat = aCompat;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Compilation.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+
+public class Compilation implements LogEvent {
+
+    private int id;
+    private boolean osr;
+    private Method method;
+    private CallSite call = new CallSite();
+    private int osrBci;
+    private String icount;
+    private String bcount;
+    private String special;
+    private double start;
+    private double end;
+    private int attempts;
+    private NMethod nmethod;
+    private ArrayList<Phase> phases = new ArrayList<Phase>(4);
+    private String failureReason;
+
+    Compilation(int id) {
+        this.id = id;
+    }
+
+    Phase getPhase(String s) {
+        for (Phase p : getPhases()) {
+            if (p.getName().equals(s)) {
+                return p;
+            }
+        }
+        return null;
+    }
+
+    double getRegallocTime() {
+        return getPhase("regalloc").getElapsedTime();
+    }
+
+    public double getStart() {
+        return start;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(getId());
+        sb.append(" ");
+        sb.append(getMethod());
+        sb.append(" ");
+        sb.append(getIcount());
+        sb.append("+");
+        sb.append(getBcount());
+        sb.append("\n");
+        for (CallSite site : getCall().getCalls()) {
+            sb.append(site);
+            sb.append("\n");
+        }
+        return sb.toString();
+    }
+
+    public void printShort(PrintStream stream) {
+        if (getMethod() == null) {
+            stream.println(getSpecial());
+        } else {
+            int bc = isOsr() ? getOsr_bci() : -1;
+            stream.print(getId() + getMethod().decodeFlags(bc) + getMethod().format(bc));
+        }
+    }
+
+    public void print(PrintStream stream) {
+        print(stream, 0, false);
+    }
+
+    public void print(PrintStream stream, boolean printInlining) {
+        print(stream, 0, printInlining);
+    }
+
+    public void print(PrintStream stream, int indent, boolean printInlining) {
+        if (getMethod() == null) {
+            stream.println(getSpecial());
+        } else {
+            int bc = isOsr() ? getOsr_bci() : -1;
+            stream.print(getId() + getMethod().decodeFlags(bc) + getMethod().format(bc));
+            stream.println();
+            if (getFailureReason() != null) {
+                stream.println("COMPILE FAILED " + getFailureReason());
+            }
+            if (printInlining && call.getCalls() != null) {
+                for (CallSite site : call.getCalls()) {
+                    site.print(stream, indent + 2);
+                }
+            }
+        }
+    }
+
+    public int getId() {
+        return id;
+    }
+
+    public void setId(int id) {
+        this.id = id;
+    }
+
+    public boolean isOsr() {
+        return osr;
+    }
+
+    public void setOsr(boolean osr) {
+        this.osr = osr;
+    }
+
+    public int getOsr_bci() {
+        return osrBci;
+    }
+
+    public void setOsr_bci(int osrBci) {
+        this.osrBci = osrBci;
+    }
+
+    public String getIcount() {
+        return icount;
+    }
+
+    public void setICount(String icount) {
+        this.icount = icount;
+    }
+
+    public String getBcount() {
+        return bcount;
+    }
+
+    public void setBCount(String bcount) {
+        this.bcount = bcount;
+    }
+
+    public String getSpecial() {
+        return special;
+    }
+
+    public void setSpecial(String special) {
+        this.special = special;
+    }
+
+    public void setStart(double start) {
+        this.start = start;
+    }
+
+    public double getEnd() {
+        return end;
+    }
+
+    public void setEnd(double end) {
+        this.end = end;
+    }
+
+    public int getAttempts() {
+        return attempts;
+    }
+
+    public void setAttempts(int attempts) {
+        this.attempts = attempts;
+    }
+
+    public NMethod getNMethod() {
+        return nmethod;
+    }
+
+    public void setNMethod(NMethod NMethod) {
+        this.nmethod = NMethod;
+    }
+
+    public ArrayList<Phase> getPhases() {
+        return phases;
+    }
+
+    public void setPhases(ArrayList<Phase> phases) {
+        this.setPhases(phases);
+    }
+
+    public String getFailureReason() {
+        return failureReason;
+    }
+
+    public void setFailureReason(String failureReason) {
+        this.failureReason = failureReason;
+    }
+
+    public Method getMethod() {
+        return method;
+    }
+
+    public void setMethod(Method method) {
+        this.method = method;
+    }
+
+    public CallSite getCall() {
+        return call;
+    }
+
+    public void setCall(CallSite call) {
+        this.call = call;
+    }
+
+    public double getElapsedTime() {
+        return end - start;
+    }
+
+    public Compilation getCompilation() {
+        return this;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Constants.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+interface Constants {
+    static final int  JVM_ACC_PUBLIC        = 0x0001;  /* visible to everyone */
+    static final int  JVM_ACC_PRIVATE       = 0x0002;  /* visible only to the defining class */
+    static final int  JVM_ACC_PROTECTED     = 0x0004;  /* visible to subclasses */
+    static final int  JVM_ACC_STATIC        = 0x0008;  /* instance variable is static */
+    static final int  JVM_ACC_FINAL         = 0x0010;  /* no further subclassing, overriding */
+    static final int  JVM_ACC_SYNCHRONIZED  = 0x0020;  /* wrap method call in monitor lock */
+    static final int  JVM_ACC_SUPER         = 0x0020;  /* funky handling of invokespecial */
+    static final int  JVM_ACC_VOLATILE      = 0x0040;  /* can not cache in registers */
+    static final int  JVM_ACC_BRIDGE        = 0x0040;  /* bridge method generated by compiler */
+    static final int  JVM_ACC_TRANSIENT     = 0x0080;  /* not persistent */
+    static final int  JVM_ACC_VARARGS       = 0x0080;  /* method declared with variable number of args */
+    static final int  JVM_ACC_NATIVE        = 0x0100;  /* implemented in C */
+    static final int  JVM_ACC_INTERFACE     = 0x0200;  /* class is an interface */
+    static final int  JVM_ACC_ABSTRACT      = 0x0400;  /* no definition provided */
+    static final int  JVM_ACC_STRICT        = 0x0800;  /* strict floating point */
+    static final int  JVM_ACC_SYNTHETIC     = 0x1000;  /* compiler-generated class, method or field */
+    static final int  JVM_ACC_ANNOTATION    = 0x2000;  /* annotation type */
+    static final int  JVM_ACC_ENUM          = 0x4000;  /* field is declared as element of enum */
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCleanupReader.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.*;
+import java.util.regex.*;
+
+/**
+ * This class is a filter class to deal with malformed XML that used
+ * to be produced by the JVM when generating LogCompilation.  In 1.6
+ * and later releases it shouldn't be required.
+ * @author never
+ */
+
+class LogCleanupReader extends Reader {
+    private Reader reader;
+
+    private char[] buffer = new char[4096];
+
+    private int bufferCount;
+
+    private int bufferOffset;
+
+    private char[] line = new char[1024];
+
+    private int index;
+
+    private int length;
+
+    private char[] one = new char[1];
+
+    LogCleanupReader(Reader r) {
+        reader = r;
+    }
+
+    static final private Matcher pattern = Pattern.compile(".+ compile_id='[0-9]+'.*( compile_id='[0-9]+)").matcher("");
+    static final private Matcher pattern2 = Pattern.compile("' (C[12]) compile_id=").matcher("");
+    static final private Matcher pattern3 = Pattern.compile("'(destroy_vm)/").matcher("");
+
+    private void fill() throws IOException {
+        rawFill();
+        if (length != -1) {
+            boolean changed = false;
+            String s = new String(line, 0, length);
+            String orig = s;
+
+            pattern2.reset(s);
+            if (pattern2.find()) {
+                s = s.substring(0, pattern2.start(1)) + s.substring(pattern2.end(1) + 1);
+                changed = true;
+            }
+
+            pattern.reset(s);
+            if (pattern.lookingAt()) {
+                s = s.substring(0, pattern.start(1)) + s.substring(pattern.end(1) + 1);
+                changed = true;
+            }
+
+            pattern3.reset(s);
+            if (pattern3.find()) {
+                s = s.substring(0, pattern3.start(1)) + s.substring(pattern3.end(1));
+                changed = true;
+            }
+
+            if (changed) {
+                s.getChars(0, s.length(), line, 0);
+                length = s.length();
+            }
+        }
+    }
+
+    private void rawFill() throws IOException {
+        if (bufferCount == -1) {
+            length = -1;
+            return;
+        }
+
+        int i = 0;
+        boolean fillNonEOL = true;
+        outer:
+        while (true) {
+            if (fillNonEOL) {
+                int p;
+                for (p = bufferOffset; p < bufferCount; p++) {
+                    char c = buffer[p];
+                    if (c == '\r' || c == '\n') {
+                        bufferOffset = p;
+                        fillNonEOL = false;
+                        continue outer;
+                    }
+                    if (i >= line.length) {
+                        // copy and enlarge the line array
+                        char[] newLine = new char[line.length * 2];
+                        System.arraycopy(line, 0, newLine, 0, line.length);
+                        line = newLine;
+                    }
+                    line[i++] = c;
+                }
+                bufferOffset = p;
+            } else {
+                int p;
+                for (p = bufferOffset; p < bufferCount; p++) {
+                    char c = buffer[p];
+                    if (c != '\r' && c != '\n') {
+                        bufferOffset = p;
+                        length = i;
+                        index = 0;
+                        return;
+                    }
+                    line[i++] = c;
+                }
+                bufferOffset = p;
+            }
+            if (bufferCount == -1) {
+                if (i == 0) {
+                    length = -1;
+                } else {
+                    length = i;
+                }
+                index = 0;
+                return;
+            }
+            if (bufferOffset != bufferCount) {
+                System.out.println(bufferOffset);
+                System.out.println(bufferCount);
+                throw new InternalError("how did we get here");
+            }
+            // load more data and try again.
+            bufferCount = reader.read(buffer, 0, buffer.length);
+            bufferOffset = 0;
+        }
+    }
+
+    public int read() throws java.io.IOException {
+        read(one, 0, 1);
+        return one[0];
+    }
+
+    public int read(char[] buffer) throws java.io.IOException {
+        return read(buffer, 0, buffer.length);
+    }
+
+    public int read(char[] b, int off, int len) throws java.io.IOException {
+        if (length == -1) {
+            return -1;
+        }
+
+        if (index == length) {
+            fill();
+            if (length == -1) {
+                return -1;
+            }
+        }
+        int n = Math.min(length - index, Math.min(b.length - off, len));
+        // System.out.printf("%d %d %d %d %d\n", index, length, off, len, n);
+        System.arraycopy(line, index, b, off, n);
+        index += n;
+        return n;
+    }
+
+    public long skip(long n) throws java.io.IOException {
+        long result = n;
+        while (n-- > 0) read();
+        return result;
+    }
+
+    public boolean ready() throws java.io.IOException {
+        return reader.ready() || (line != null && length > 0);
+    }
+
+    public boolean markSupported() {
+        return false;
+    }
+
+    public void mark(int unused) throws java.io.IOException {
+        throw new UnsupportedOperationException("mark not supported");
+    }
+
+    public void reset() throws java.io.IOException {
+        reader.reset();
+        line = null;
+        index = 0;
+    }
+
+    public void close() throws java.io.IOException {
+        reader.close();
+        line = null;
+        index = 0;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * The main command line driver of a parser for LogCompilation output.
+ * @author never
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+import java.util.*;
+import org.xml.sax.*;
+import org.xml.sax.helpers.*;
+
+public class LogCompilation extends DefaultHandler implements ErrorHandler, Constants {
+
+    public static void usage(int exitcode) {
+        System.out.println("Usage: LogCompilation [ -v ] [ -c ] [ -s ] [ -e | -N ] file1 ...");
+        System.out.println("  -c:   clean up malformed 1.5 xml");
+        System.out.println("  -i:   print inlining decisions");
+        System.out.println("  -S:   print compilation statistics");
+        System.out.println("  -s:   sort events by start time");
+        System.out.println("  -e:   sort events by elapsed time");
+        System.out.println("  -N:   sort events by name and start");
+        System.exit(exitcode);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Comparator<LogEvent> defaultSort = LogParser.sortByStart;
+        boolean statistics = false;
+        boolean printInlining = false;
+        boolean cleanup = false;
+        int index = 0;
+
+        while (args.length > index) {
+            if (args[index].equals("-e")) {
+                defaultSort = LogParser.sortByElapsed;
+                index++;
+            } else if (args[index].equals("-n")) {
+                defaultSort = LogParser.sortByNameAndStart;
+                index++;
+            } else if (args[index].equals("-s")) {
+                defaultSort = LogParser.sortByStart;
+                index++;
+            } else if (args[index].equals("-c")) {
+                cleanup = true;
+                index++;
+            } else if (args[index].equals("-S")) {
+                statistics = true;
+                index++;
+            } else if (args[index].equals("-h")) {
+                usage(0);
+            } else if (args[index].equals("-i")) {
+                printInlining = true;
+                index++;
+            } else {
+                break;
+            }
+        }
+
+        if (index >= args.length) {
+            usage(1);
+        }
+
+        while (index < args.length) {
+            ArrayList<LogEvent> events = LogParser.parse(args[index], cleanup);
+
+            if (statistics) {
+                printStatistics(events, System.out);
+            } else {
+                Collections.sort(events, defaultSort);
+                for (LogEvent c : events) {
+                    if (printInlining && c instanceof Compilation) {
+                        Compilation comp = (Compilation)c;
+                        comp.print(System.out, true);
+                    } else {
+                        c.print(System.out);
+                    }
+                }
+            }
+            index++;
+        }
+    }
+
+    public static void printStatistics(ArrayList<LogEvent> events, PrintStream out) {
+        long cacheSize = 0;
+        long maxCacheSize = 0;
+        int nmethodsCreated = 0;
+        int nmethodsLive = 0;
+        int[] attempts = new int[32];
+        double regallocTime = 0;
+        int maxattempts = 0;
+
+        LinkedHashMap<String, Double> phaseTime = new LinkedHashMap<String, Double>(7);
+        LinkedHashMap<String, Integer> phaseNodes = new LinkedHashMap<String, Integer>(7);
+        double elapsed = 0;
+
+        for (LogEvent e : events) {
+            if (e instanceof Compilation) {
+                Compilation c = (Compilation) e;
+                c.printShort(out);
+                out.printf(" %6.4f\n", c.getElapsedTime());
+                attempts[c.getAttempts()]++;
+                maxattempts = Math.max(maxattempts,c.getAttempts());
+                elapsed += c.getElapsedTime();
+                for (Phase phase : c.getPhases()) {
+                    out.printf("\t%s %6.4f\n", phase.getName(), phase.getElapsedTime());
+                    Double v = phaseTime.get(phase.getName());
+                    if (v == null) {
+                        v = Double.valueOf(0.0);
+                    }
+                    phaseTime.put(phase.getName(), Double.valueOf(v.doubleValue() + phase.getElapsedTime()));
+
+                    Integer v2 = phaseNodes.get(phase.getName());
+                    if (v2 == null) {
+                        v2 = Integer.valueOf(0);
+                    }
+                    phaseNodes.put(phase.getName(), Integer.valueOf(v2.intValue() + phase.getNodes()));
+                }
+            } else if (e instanceof MakeNotEntrantEvent) {
+                MakeNotEntrantEvent mne = (MakeNotEntrantEvent) e;
+                NMethod nm = mne.getNMethod();
+                if (mne.isZombie()) {
+                    if (nm == null) {
+                        System.err.println(mne.getId());
+                    }
+                    cacheSize -= nm.getSize();
+                    nmethodsLive--;
+                }
+            } else if (e instanceof NMethod) {
+                nmethodsLive++;
+                nmethodsCreated++;
+                NMethod nm = (NMethod) e;
+                cacheSize += nm.getSize();
+                maxCacheSize = Math.max(cacheSize, maxCacheSize);
+            }
+        }
+        out.printf("NMethods: %d created %d live %d bytes (%d peak) in the code cache\n",
+                          nmethodsCreated, nmethodsLive, cacheSize, maxCacheSize);
+        out.println("Phase times:");
+        for (String name : phaseTime.keySet()) {
+            Double v = phaseTime.get(name);
+            Integer v2 = phaseNodes.get(name);
+            out.printf("%20s %6.4f %d\n", name, v.doubleValue(), v2.intValue());
+        }
+        out.printf("%20s %6.4f\n", "total", elapsed);
+
+        if (maxattempts > 0) {
+            out.println("Distribution of regalloc passes:");
+            for (int i = 0; i <= maxattempts; i++) {
+                out.printf("%2d %8d\n", i, attempts[i]);
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogEvent.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+import java.util.*;
+
+public interface LogEvent {
+    public double getStart();
+
+    public double getElapsedTime();
+
+    public Compilation getCompilation();
+
+    public void print(PrintStream stream);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,430 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * A SAX based parser of LogCompilation output from HotSpot.  It takes a complete
+ * @author never
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.FileReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Stack;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import org.xml.sax.Attributes;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class LogParser extends DefaultHandler implements ErrorHandler, Constants {
+
+    static final HashMap<String, String> typeMap;
+    static {
+        typeMap = new HashMap<String, String>();
+        typeMap.put("[I", "int[]");
+        typeMap.put("[C", "char[]");
+        typeMap.put("[Z", "boolean[]");
+        typeMap.put("[L", "Object[]");
+        typeMap.put("[B", "byte[]");
+    }
+
+    static Comparator<LogEvent> sortByStart = new Comparator<LogEvent>() {
+
+        public int compare(LogEvent a, LogEvent b) {
+            double difference = (a.getStart() - b.getStart());
+            if (difference < 0) {
+                return -1;
+            }
+            if (difference > 0) {
+                return 1;
+            }
+            return 0;
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            return false;
+        }
+
+        @Override
+        public int hashCode() {
+            return 7;
+        }
+    };
+    static Comparator<LogEvent> sortByNameAndStart = new Comparator<LogEvent>() {
+
+        public int compare(LogEvent a, LogEvent b) {
+            Compilation c1 = a.getCompilation();
+            Compilation c2 = b.getCompilation();
+            if (c1 != null && c2 != null) {
+                int result = c1.getMethod().toString().compareTo(c2.getMethod().toString());
+                if (result != 0) {
+                    return result;
+                }
+            }
+            double difference = (a.getStart() - b.getStart());
+            if (difference < 0) {
+                return -1;
+            }
+            if (difference > 0) {
+                return 1;
+            }
+            return 0;
+        }
+
+        public boolean equals(Object other) {
+            return false;
+        }
+
+        @Override
+        public int hashCode() {
+            return 7;
+        }
+    };
+    static Comparator<LogEvent> sortByElapsed = new Comparator<LogEvent>() {
+
+        public int compare(LogEvent a, LogEvent b) {
+            double difference = (a.getElapsedTime() - b.getElapsedTime());
+            if (difference < 0) {
+                return -1;
+            }
+            if (difference > 0) {
+                return 1;
+            }
+            return 0;
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            return false;
+        }
+
+        @Override
+        public int hashCode() {
+            return 7;
+        }
+    };
+
+    private ArrayList<LogEvent> events = new ArrayList<LogEvent>();
+
+    private HashMap<String, String> types = new HashMap<String, String>();
+    private HashMap<String, Method> methods = new HashMap<String, Method>();
+    private LinkedHashMap<String, NMethod> nmethods = new LinkedHashMap<String, NMethod>();
+    private HashMap<String, Compilation> compiles = new HashMap<String, Compilation>();
+    private String failureReason;
+    private int bci;
+    private Stack<CallSite> scopes = new Stack<CallSite>();
+    private Compilation compile;
+    private CallSite site;
+    private Stack<Phase> phaseStack = new Stack<Phase>();
+    private UncommonTrapEvent currentTrap;
+
+    long parseLong(String l) {
+        try {
+            return Long.decode(l).longValue();
+        } catch (NumberFormatException nfe) {
+            int split = l.length() - 8;
+            String s1 = "0x" + l.substring(split);
+            String s2 = l.substring(0, split);
+            long v1 = Long.decode(s1).longValue() & 0xffffffffL;
+            long v2 = (Long.decode(s2).longValue() & 0xffffffffL) << 32;
+            if (!l.equals("0x" + Long.toHexString(v1 + v2))) {
+                System.out.println(l);
+                System.out.println(s1);
+                System.out.println(s2);
+                System.out.println(v1);
+                System.out.println(v2);
+                System.out.println(Long.toHexString(v1 + v2));
+                throw new InternalError("bad conversion");
+            }
+            return v1 + v2;
+        }
+    }
+
+    public static ArrayList<LogEvent> parse(String file, boolean cleanup) throws Exception {
+        return parse(new FileReader(file), cleanup);
+    }
+
+    public static ArrayList<LogEvent> parse(Reader reader, boolean cleanup) throws Exception {
+        // Create the XML input factory
+        SAXParserFactory factory = SAXParserFactory.newInstance();
+
+        // Create the XML LogEvent reader
+        SAXParser p = factory.newSAXParser();
+
+        if (cleanup) {
+            // some versions of the log have slightly malformed XML, so clean it
+            // up before passing it to SAX
+            reader = new LogCleanupReader(reader);
+        }
+
+        LogParser log = new LogParser();
+        p.parse(new InputSource(reader), log);
+
+        // Associate compilations with their NMethods
+        for (NMethod nm : log.nmethods.values()) {
+            Compilation c = log.compiles.get(nm.getId());
+            nm.setCompilation(c);
+            // Native wrappers for methods don't have a compilation
+            if (c != null) {
+                c.setNMethod(nm);
+            }
+        }
+
+        // Initially we want the LogEvent log sorted by timestamp
+        Collections.sort(log.events, sortByStart);
+
+        return log.events;
+    }
+
+    String search(Attributes attr, String name) {
+        return search(attr, name, null);
+    }
+
+    String search(Attributes attr, String name, String defaultValue) {
+        String result = attr.getValue(name);
+        if (result != null) {
+            return result;
+        }
+        if (defaultValue != null) {
+            return defaultValue;
+        }
+        for (int i = 0; i < attr.getLength(); i++) {
+            System.out.println(attr.getQName(i) + " " + attr.getValue(attr.getQName(i)));
+        }
+        throw new InternalError("can't find " + name);
+    }
+    int indent = 0;
+    String compile_id;
+
+    String type(String id) {
+        String result = types.get(id);
+        if (result == null) {
+            throw new InternalError(id);
+        }
+        String remapped = typeMap.get(result);
+        if (remapped != null) {
+            return remapped;
+        }
+        return result;
+    }
+
+    void type(String id, String name) {
+        assert type(id) == null;
+        types.put(id, name);
+    }
+
+    Method method(String id) {
+        Method result = methods.get(id);
+        if (result == null) {
+            throw new InternalError(id);
+        }
+        return result;
+    }
+
+    public String makeId(Attributes atts) {
+        String id = atts.getValue("compile_id");
+        String kind = atts.getValue("kind");
+        if (kind != null && kind.equals("osr")) {
+            id += "%";
+        }
+        return id;
+    }
+
+    @Override
+    public void startElement(String uri,
+            String localName,
+            String qname,
+            Attributes atts) {
+        if (qname.equals("phase")) {
+            Phase p = new Phase(search(atts, "name"),
+                    Double.parseDouble(search(atts, "stamp")),
+                    Integer.parseInt(search(atts, "nodes")));
+            phaseStack.push(p);
+        } else if (qname.equals("phase_done")) {
+            Phase p = phaseStack.pop();
+            p.setEndNodes(Integer.parseInt(search(atts, "nodes")));
+            p.setEnd(Double.parseDouble(search(atts, "stamp")));
+            compile.getPhases().add(p);
+        } else if (qname.equals("task")) {
+            compile = new Compilation(Integer.parseInt(search(atts, "compile_id", "-1")));
+            compile.setStart(Double.parseDouble(search(atts, "stamp")));
+            compile.setICount(search(atts, "count", "0"));
+            compile.setBCount(search(atts, "backedge_count", "0"));
+
+            String method = atts.getValue("method");
+            int space = method.indexOf(' ');
+            method = method.substring(0, space) + "::" +
+                    method.substring(space + 1, method.indexOf(' ', space + 1) + 1);
+            String compiler = atts.getValue("compiler");
+            if (compiler == null) {
+                compiler = "";
+            }
+            String kind = atts.getValue("compile_kind");
+            if (kind == null) {
+                kind = "normal";
+            }
+            if (kind.equals("osr")) {
+                compile.setOsr(true);
+                compile.setOsr_bci(Integer.parseInt(search(atts, "osr_bci")));
+            } else if (kind.equals("c2i")) {
+                compile.setSpecial("--- adapter " + method);
+            } else {
+                compile.setSpecial(compile.getId() + " " + method + " (0 bytes)");
+            }
+            events.add(compile);
+            compiles.put(makeId(atts), compile);
+        } else if (qname.equals("type")) {
+            type(search(atts, "id"), search(atts, "name"));
+        } else if (qname.equals("bc")) {
+            bci = Integer.parseInt(search(atts, "bci"));
+        } else if (qname.equals("klass")) {
+            type(search(atts, "id"), search(atts, "name"));
+        } else if (qname.equals("method")) {
+            String id = search(atts, "id");
+            Method m = new Method();
+            m.setHolder(type(search(atts, "holder")));
+            m.setName(search(atts, "name"));
+            m.setReturnType(type(search(atts, "return")));
+            m.setArguments(search(atts, "arguments", "void"));
+            m.setBytes(search(atts, "bytes"));
+            m.setIICount(search(atts, "iicount"));
+            m.setFlags(search(atts, "flags"));
+            methods.put(id, m);
+        } else if (qname.equals("call")) {
+            site = new CallSite(bci, method(search(atts, "method")));
+            site.setCount(Integer.parseInt(search(atts, "count")));
+            String receiver = atts.getValue("receiver");
+            if (receiver != null) {
+                site.setReceiver(type(receiver));
+                site.setReceiver_count(Integer.parseInt(search(atts, "receiver_count")));
+            }
+            scopes.peek().add(site);
+        } else if (qname.equals("regalloc")) {
+            compile.setAttempts(Integer.parseInt(search(atts, "attempts")));
+        } else if (qname.equals("inline_fail")) {
+            scopes.peek().last().setReason(search(atts, "reason"));
+        } else if (qname.equals("failure")) {
+            failureReason = search(atts, "reason");
+        } else if (qname.equals("task_done")) {
+            compile.setEnd(Double.parseDouble(search(atts, "stamp")));
+            if (Integer.parseInt(search(atts, "success")) == 0) {
+                compile.setFailureReason(failureReason);
+            }
+        } else if (qname.equals("make_not_entrant")) {
+            String id = makeId(atts);
+            NMethod nm = nmethods.get(id);
+            if (nm == null) throw new InternalError();
+            LogEvent e = new MakeNotEntrantEvent(Double.parseDouble(search(atts, "stamp")), id,
+                                                 atts.getValue("zombie") != null, nm);
+            events.add(e);
+        } else if (qname.equals("uncommon_trap")) {
+            String id = atts.getValue("compile_id");
+            if (id != null) {
+                id = makeId(atts);
+                currentTrap = new UncommonTrapEvent(Double.parseDouble(search(atts, "stamp")),
+                        id,
+                        atts.getValue("reason"),
+                        atts.getValue("action"),
+                        Integer.parseInt(search(atts, "count", "0")));
+                events.add(currentTrap);
+            } else {
+                // uncommon trap inserted during parsing.
+                // ignore for now
+            }
+        } else if (qname.equals("jvms")) {
+            // <jvms bci='4' method='java/io/DataInputStream readChar ()C' bytes='40' count='5815' iicount='20815'/>
+            if (currentTrap != null) {
+                currentTrap.addJVMS(atts.getValue("method"), Integer.parseInt(atts.getValue("bci")));
+            } else {
+                System.err.println("Missing uncommon_trap for jvms");
+            }
+        } else if (qname.equals("nmethod")) {
+            String id = makeId(atts);
+            NMethod nm = new NMethod(Double.parseDouble(search(atts, "stamp")),
+                    id,
+                    parseLong(atts.getValue("address")),
+                    parseLong(atts.getValue("size")));
+            nmethods.put(id, nm);
+            events.add(nm);
+        } else if (qname.equals("parse")) {
+            Method m = method(search(atts, "method"));
+            if (scopes.size() == 0) {
+                compile.setMethod(m);
+                scopes.push(compile.getCall());
+            } else {
+                if (site.getMethod() == m) {
+                    scopes.push(site);
+                } else if (scopes.peek().getCalls().size() > 2 && m == scopes.peek().last(-2).getMethod()) {
+                    scopes.push(scopes.peek().last(-2));
+                } else {
+                    System.out.println(site.getMethod());
+                    System.out.println(m);
+                    throw new InternalError("call site and parse don't match");
+                }
+            }
+        }
+    }
+
+    @Override
+    public void endElement(String uri,
+            String localName,
+            String qname) {
+        if (qname.equals("parse")) {
+            indent -= 2;
+            scopes.pop();
+        } else if (qname.equals("uncommon_trap")) {
+            currentTrap = null;
+        } else if (qname.equals("task")) {
+            types.clear();
+            methods.clear();
+            site = null;
+        }
+    }
+
+    @Override
+    public void warning(org.xml.sax.SAXParseException e) {
+        System.err.println(e.getMessage() + " at line " + e.getLineNumber() + ", column " + e.getColumnNumber());
+        e.printStackTrace();
+    }
+
+    @Override
+    public void error(org.xml.sax.SAXParseException e) {
+        System.err.println(e.getMessage() + " at line " + e.getLineNumber() + ", column " + e.getColumnNumber());
+        e.printStackTrace();
+    }
+
+    @Override
+    public void fatalError(org.xml.sax.SAXParseException e) {
+        System.err.println(e.getMessage() + " at line " + e.getLineNumber() + ", column " + e.getColumnNumber());
+        e.printStackTrace();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/MakeNotEntrantEvent.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+
+class MakeNotEntrantEvent extends BasicLogEvent {
+    private final boolean zombie;
+
+    private NMethod nmethod;
+
+    MakeNotEntrantEvent(double s, String i, boolean z, NMethod nm) {
+        super(s, i);
+        zombie = z;
+        nmethod = nm;
+    }
+
+    public NMethod getNMethod() {
+        return nmethod;
+    }
+
+    public void print(PrintStream stream) {
+        if (isZombie()) {
+            stream.printf("%s make_zombie\n", getId());
+        } else {
+            stream.printf("%s make_not_entrant\n", getId());
+        }
+    }
+
+    public boolean isZombie() {
+        return zombie;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Method.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.util.Arrays;
+
+public class Method implements Constants {
+
+    private String holder;
+    private String name;
+    private String returnType;
+    private String arguments;
+    private String bytes;
+    private String iicount;
+    private String flags;
+
+    String decodeFlags(int osr_bci) {
+        int f = Integer.parseInt(getFlags());
+        char[] c = new char[4];
+        Arrays.fill(c, ' ');
+        if (osr_bci >= 0) {
+            c[0] = '%';
+        }
+        if ((f & JVM_ACC_SYNCHRONIZED) != 0) {
+            c[1] = 's';
+        }
+        return new String(c);
+    }
+
+    String format(int osr_bci) {
+        if (osr_bci >= 0) {
+            return getHolder().replace('/', '.') + "::" + getName() + " @ " + osr_bci + " (" + getBytes() + " bytes)";
+        } else {
+            return getHolder().replace('/', '.') + "::" + getName() + " (" + getBytes() + " bytes)";
+        }
+    }
+
+    @Override
+    public String toString() {
+        return getHolder().replace('/', '.') + "::" + getName() + " (" + getBytes() + " bytes)";
+    }
+
+    public String getHolder() {
+        return holder;
+    }
+
+    public void setHolder(String holder) {
+        this.holder = holder;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getReturnType() {
+        return returnType;
+    }
+
+    public void setReturnType(String returnType) {
+        this.returnType = returnType;
+    }
+
+    public String getArguments() {
+        return arguments;
+    }
+
+    public void setArguments(String arguments) {
+        this.arguments = arguments;
+    }
+
+    public String getBytes() {
+        return bytes;
+    }
+
+    public void setBytes(String bytes) {
+        this.bytes = bytes;
+    }
+
+    public String getIICount() {
+        return iicount;
+    }
+
+    public void setIICount(String iicount) {
+        this.iicount = iicount;
+    }
+
+    public String getFlags() {
+        return flags;
+    }
+
+    public void setFlags(String flags) {
+        this.flags = flags;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/NMethod.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+
+public class NMethod extends BasicLogEvent {
+
+    private long address;
+    private long size;
+
+    NMethod(double s, String i, long a, long sz) {
+        super(s, i);
+        address = a;
+        size = sz;
+    }
+
+    public void print(PrintStream out) {
+        // XXX Currently we do nothing
+        // throw new InternalError();
+    }
+
+    public long getAddress() {
+        return address;
+    }
+
+    public void setAddress(long address) {
+        this.address = address;
+    }
+
+    public long getSize() {
+        return size;
+    }
+
+    public void setSize(long size) {
+        this.size = size;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+
+public class Phase extends BasicLogEvent {
+
+    private final int startNodes;
+    private int endNodes;
+
+    Phase(String n, double s, int nodes) {
+        super(s, n);
+        startNodes = nodes;
+    }
+
+    int getNodes() {
+        return getStartNodes();
+    }
+
+    void setEndNodes(int n) {
+        endNodes = n;
+    }
+
+    public String getName() {
+        return getId();
+    }
+
+    public int getStartNodes() {
+        return startNodes;
+    }
+
+    public int getEndNodes() {
+        return endNodes;
+    }
+
+    @Override
+    public void print(PrintStream stream) {
+        throw new UnsupportedOperationException("Not supported yet.");
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/UncommonTrapEvent.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package com.sun.hotspot.tools.compiler;
+
+import java.io.PrintStream;
+
+class UncommonTrapEvent extends BasicLogEvent {
+
+    private final String reason;
+    private final String action;
+    private int count;
+    private String jvms = "";
+
+    UncommonTrapEvent(double s, String i, String r, String a, int c) {
+        super(s, i);
+        reason = r;
+        action = a;
+        count = c;
+    }
+
+
+    public void addJVMS(String method, int bci) {
+        setJvms(getJvms() + "  @" + bci + " " + method + "\n");
+    }
+
+    public void updateCount(UncommonTrapEvent trap) {
+        setCount(Math.max(getCount(), trap.getCount()));
+    }
+
+    public void print(PrintStream stream) {
+        stream.printf("%s uncommon trap %s %s\n", getId(), getReason(), getAction());
+        stream.print(getJvms());
+    }
+
+    public String getReason() {
+        return reason;
+    }
+
+    public String getAction() {
+        return action;
+    }
+
+    public int getCount() {
+        return count;
+    }
+
+    public void setCount(int count) {
+        this.count = count;
+    }
+
+    public String getJvms() {
+        return jvms;
+    }
+
+    public void setJvms(String jvms) {
+        this.jvms = jvms;
+    }
+
+    public void setCompilation(Compilation compilation) {
+        this.compilation = compilation;
+    }
+}
--- a/hotspot/src/share/tools/MakeDeps/BuildConfig.java	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/tools/MakeDeps/BuildConfig.java	Thu Mar 12 18:16:36 2009 -0700
@@ -247,6 +247,7 @@
         sysDefines.add("HOTSPOT_BUILD_USER="+System.getProperty("user.name"));
         sysDefines.add("HOTSPOT_BUILD_TARGET=\\\""+get("Build")+"\\\"");
         sysDefines.add("_JNI_IMPLEMENTATION_");
+        sysDefines.add("HOTSPOT_LIB_ARCH=\\\"i486\\\"");
 
         sysDefines.addAll(defines);
 
--- a/hotspot/src/share/tools/MakeDeps/Database.java	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/tools/MakeDeps/Database.java	Thu Mar 12 18:16:36 2009 -0700
@@ -365,7 +365,7 @@
 
     // HACK ALERT. The compilation of ad_<arch> files is very slow.
     // We want to start compiling them as early as possible. The compilation
-    // order on unix is dependant on the order we emit files here.
+    // order on unix is dependent on the order we emit files here.
     // By sorting the output before emitting it, we expect
     // that ad_<arch> will be compiled early.
     boolean shouldSortObjFiles = true;
--- a/hotspot/src/share/vm/adlc/Doc/Syntax.doc	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/Doc/Syntax.doc	Thu Mar 12 18:16:36 2009 -0700
@@ -88,7 +88,7 @@
                          // these are used for constraints, etc.
 
 alloc_class class1(AX, BX); // form an allocation class of registers
-                            // used by the register allocator for seperate
+                            // used by the register allocator for separate
                             // allocation of target register classes
 
 	3. Pipeline Syntax for Scheduling
@@ -150,7 +150,7 @@
 		b. %}  (block terminator)
 		c. EOF (file terminator)
 
-	4. Each statement must start on a seperate line
+	4. Each statement must start on a separate line
 
 	5. Identifiers cannot contain: (){}%;,"/\
 
--- a/hotspot/src/share/vm/adlc/adlc.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/adlc.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -79,6 +79,7 @@
 
 // Macros
 // Debugging note:  Put a breakpoint on "abort".
+#undef assert
 #define assert(cond, msg) { if (!(cond)) { fprintf(stderr, "assert fails %s %d: %s\n", __FILE__, __LINE__, msg); abort(); }}
 #define max(a, b)   (((a)>(b)) ? (a) : (b))
 
--- a/hotspot/src/share/vm/adlc/adlparse.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/adlparse.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -298,7 +298,7 @@
   rule->count_commutative_op(count);
   if (count > 0) {
     // Clone match rule and swap commutative operation's operands.
-    rule->swap_commutative_op(instr_ident, count, match_rules_cnt);
+    rule->matchrule_swap_commutative_op(instr_ident, count, match_rules_cnt);
   }
 }
 
@@ -2586,7 +2586,7 @@
   while( _curchar != ')' ) {
     // Get information on the left instruction and its operand
     // left-instructions's number
-    intptr_t   left_inst = get_int();
+    int left_inst = get_int();
     // Left-instruction's operand
     skipws();
     if( _curchar != '.' ) {
@@ -2602,7 +2602,7 @@
 
     skipws();
     // Get information on the right instruction and its operand
-    intptr_t right_inst;        // Right-instructions's number
+    int right_inst;        // Right-instructions's number
     if( isdigit(_curchar) ) {
       right_inst = get_int();
       // Right-instruction's operand
@@ -3497,22 +3497,24 @@
 
     // (1)
     // Check if there is a string to pass through to output
-    char *start = _ptr;       // Record start of the next string
-    while ((_curchar != '$') && ((_curchar != '%') || (*(_ptr+1) != '}')) ) {
-      // If at the start of a comment, skip past it
-      if( (_curchar == '/') && ((*(_ptr+1) == '/') || (*(_ptr+1) == '*')) ) {
-        skipws_no_preproc();
-      } else {
-        // ELSE advance to the next character, or start of the next line
-        next_char_or_line();
+    {
+      char *start = _ptr;       // Record start of the next string
+      while ((_curchar != '$') && ((_curchar != '%') || (*(_ptr+1) != '}')) ) {
+        // If at the start of a comment, skip past it
+        if( (_curchar == '/') && ((*(_ptr+1) == '/') || (*(_ptr+1) == '*')) ) {
+          skipws_no_preproc();
+        } else {
+          // ELSE advance to the next character, or start of the next line
+          next_char_or_line();
+        }
       }
-    }
-    // If a string was found, terminate it and record in EncClass
-    if ( start != _ptr ) {
-      *_ptr  = '\0';          // Terminate the string
-      // Add flag to _strings list indicating we should check _rep_vars
-      format->_strings.addName(NameList::_signal2);
-      format->_strings.addName(start);
+      // If a string was found, terminate it and record in EncClass
+      if ( start != _ptr ) {
+        *_ptr  = '\0';          // Terminate the string
+        // Add flag to _strings list indicating we should check _rep_vars
+        format->_strings.addName(NameList::_signal2);
+        format->_strings.addName(start);
+      }
     }
 
     // (2)
@@ -3563,10 +3565,10 @@
             // copy it and record in FormatRule
             if ( _curchar == '$' ) {
               next_char();          // Move past the '$'
-              char* rep_var = get_ident(); // Nil terminate the variable name
-              rep_var = strdup(rep_var);// Copy the string
+              char* next_rep_var = get_ident(); // Nil terminate the variable name
+              next_rep_var = strdup(next_rep_var);// Copy the string
               *_ptr   = _curchar;     // and replace Nil with original character
-              format->_rep_vars.addName(rep_var);
+              format->_rep_vars.addName(next_rep_var);
               // Add flag to _strings list indicating we should check _rep_vars
               format->_strings.addName(NameList::_signal);
             }
@@ -3714,13 +3716,13 @@
           parse_err(SYNERR, "identifier expected at %c\n", _curchar);
           continue;
         }                            // Check that you have a valid operand
-        const Form *form = instr->_localNames[ident2];
-        if (!form) {
+        const Form *form2 = instr->_localNames[ident2];
+        if (!form2) {
           parse_err(SYNERR, "operand name expected at %s\n", ident2);
           continue;
         }
-        oper = form->is_operand();
-        if (oper == NULL && !form->is_opclass()) {
+        oper = form2->is_operand();
+        if (oper == NULL && !form2->is_opclass()) {
           parse_err(SYNERR, "operand name expected at %s\n", ident2);
           continue;
         }                            // Add operand to list
@@ -4271,7 +4273,7 @@
   int           result;           // Storage for integer result
 
   if( _curline == NULL )          // Return NULL at EOF.
-    return NULL;
+    return 0;
 
   skipws();                       // Skip whitespace before identifier
   start = end = _ptr;             // Start points at first character
@@ -4553,7 +4555,7 @@
 
 //---------------------------ensure_start_of_line------------------------------
 // A preprocessor directive has been encountered.  Be sure it has fallen at
-// the begining of a line, or else report an error.
+// the beginning of a line, or else report an error.
 void ADLParser::ensure_start_of_line(void) {
   if (_curchar == '\n') { next_line(); return; }
   assert( _ptr >= _curline && _ptr < _curline+strlen(_curline),
--- a/hotspot/src/share/vm/adlc/archDesc.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/archDesc.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -436,10 +436,12 @@
   if ((oper->_matrule) && (oper->_matrule->_lChild == NULL) &&
       (oper->_matrule->_rChild == NULL)) {
 
-    const Form *form = _globalNames[oper->_matrule->_opType];
-    if ((form) && form->is_operand() &&
-        (form->ideal_only() == false)) {
-      add_chain_rule_entry(oper->_matrule->_opType, oper->cost(), oper->_ident);
+    {
+      const Form *form = _globalNames[oper->_matrule->_opType];
+      if ((form) && form->is_operand() &&
+          (form->ideal_only() == false)) {
+        add_chain_rule_entry(oper->_matrule->_opType, oper->cost(), oper->_ident);
+      }
     }
     // Check for additional chain rules
     if (oper->_matrule->_next) {
@@ -1015,12 +1017,12 @@
   int idealIndex = 0;
   for (idealIndex = 1; idealIndex < _last_machine_leaf; ++idealIndex) {
     const char *idealName = NodeClassNames[idealIndex];
-    _idealIndex.Insert((void*)idealName, (void*)idealIndex);
+    _idealIndex.Insert((void*) idealName, (void*) (intptr_t) idealIndex);
   }
   for ( idealIndex = _last_machine_leaf+1;
         idealIndex < _last_opcode; ++idealIndex) {
     const char *idealName = NodeClassNames[idealIndex];
-    _idealIndex.Insert((void*)idealName, (void*)idealIndex);
+    _idealIndex.Insert((void*) idealName, (void*) (intptr_t) idealIndex);
   }
 
 }
--- a/hotspot/src/share/vm/adlc/dfa.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/dfa.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -870,7 +870,7 @@
 }
 
 // Print out the dictionary contents as key-value pairs
-static void dumpekey(const void* key)  { fprintf(stdout, "%s", key); }
+static void dumpekey(const void* key)  { fprintf(stdout, "%s", (char*) key); }
 static void dumpexpr(const void* expr) { fflush(stdout); ((Expr*)expr)->print(); }
 
 void ExprDict::dump() {
@@ -1020,7 +1020,7 @@
 }
 
 // Print out the dictionary contents as key-value pairs
-static void print_key (const void* key)              { fprintf(stdout, "%s", key); }
+static void print_key (const void* key)              { fprintf(stdout, "%s", (char*) key); }
 static void print_production(const void* production) { fflush(stdout); ((Production*)production)->print(); }
 
 void ProductionState::print() {
--- a/hotspot/src/share/vm/adlc/dict2.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/dict2.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -275,7 +275,7 @@
 // Convert string to hash key.  This algorithm implements a universal hash
 // function with the multipliers frozen (ok, so it's not universal).  The
 // multipliers (and allowable characters) are all odd, so the resultant sum
-// is odd - guarenteed not divisible by any power of two, so the hash tables
+// is odd - guaranteed not divisible by any power of two, so the hash tables
 // can be any power of two with good results.  Also, I choose multipliers
 // that have only 2 bits set (the low is always set to be odd) so
 // multiplication requires only shifts and adds.  Characters are required to
@@ -296,7 +296,7 @@
 }
 
 //------------------------------hashptr--------------------------------------
-// Slimey cheap hash function; no guarenteed performance.  Better than the
+// Slimey cheap hash function; no guaranteed performance.  Better than the
 // default for pointers, especially on MS-DOS machines.
 int hashptr(const void *key) {
 #ifdef __TURBOC__
@@ -306,7 +306,7 @@
 #endif
 }
 
-// Slimey cheap hash function; no guarenteed performance.
+// Slimey cheap hash function; no guaranteed performance.
 int hashkey(const void *key) {
   return (int)((intptr_t)key);
 }
--- a/hotspot/src/share/vm/adlc/dict2.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/dict2.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -89,10 +89,10 @@
 
 // Hashing functions
 int hashstr(const void *s);        // Nice string hash
-// Slimey cheap hash function; no guarenteed performance.  Better than the
+// Slimey cheap hash function; no guaranteed performance.  Better than the
 // default for pointers, especially on MS-DOS machines.
 int hashptr(const void *key);
-// Slimey cheap hash function; no guarenteed performance.
+// Slimey cheap hash function; no guaranteed performance.
 int hashkey(const void *key);
 
 // Key comparators
--- a/hotspot/src/share/vm/adlc/filebuff.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/filebuff.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,8 @@
 // FILEBUFF.CPP - Routines for handling a parser file buffer
 #include "adlc.hpp"
 
+using namespace std;
+
 //------------------------------FileBuff---------------------------------------
 // Create a new parsing buffer
 FileBuff::FileBuff( BufferedFile *fptr, ArchDesc& archDesc) : _fp(fptr), _AD(archDesc) {
@@ -48,10 +50,10 @@
     file_error(SEMERR, 0, "Buffer allocation failed\n");
     exit(1);                    // Exit on allocation failure
   }
-  *_bigbuf = '\n';               // Lead with a sentinal newline
-  _buf = _bigbuf+1;                     // Skip sentinal
+  *_bigbuf = '\n';               // Lead with a sentinel newline
+  _buf = _bigbuf+1;                     // Skip sentinel
   _bufmax = _buf;               // Buffer is empty
-  _bufeol = _bigbuf;              // _bufeol points at sentinal
+  _bufeol = _bigbuf;              // _bufeol points at sentinel
   _filepos = -1;                 // filepos is in sync with _bufeol
   _bufoff = _offset = 0L;       // Offset at file start
 
@@ -60,8 +62,8 @@
     file_error(SEMERR, 0, "File read error, no input read\n");
     exit(1);                     // Exit on read error
   }
-  *_bufmax = '\n';               // End with a sentinal new-line
-  *(_bufmax+1) = '\0';           // Then end with a sentinal NULL
+  *_bufmax = '\n';               // End with a sentinel new-line
+  *(_bufmax+1) = '\0';           // Then end with a sentinel NULL
 }
 
 //------------------------------~FileBuff--------------------------------------
@@ -79,7 +81,7 @@
 
   _linenum++;
   retval = ++_bufeol;      // return character following end of previous line
-  if (*retval == '\0') return NULL; // Check for EOF sentinal
+  if (*retval == '\0') return NULL; // Check for EOF sentinel
   // Search for newline character which must end each line
   for(_filepos++; *_bufeol != '\n'; _bufeol++)
     _filepos++;                    // keep filepos in sync with _bufeol
@@ -217,7 +219,7 @@
     off = expandtab(os,off,*s++,'-','-');
   if( i == len ) os << '^';     // Mark end of region
   os << '\n';                   // End of marked line
-  return 0L;                    // All done
+  return 0;                     // All done
 }
 
 //------------------------------print------------------------------------------
--- a/hotspot/src/share/vm/adlc/filebuff.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/filebuff.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -26,6 +26,7 @@
 #include <iostream>
 
 using namespace std;
+
 // STRUCTURE FOR HANDLING INPUT AND OUTPUT FILES
 typedef struct {
   const char *_name;
@@ -36,7 +37,7 @@
 
 //------------------------------FileBuff--------------------------------------
 // This class defines a nicely behaved buffer of text.  Entire file of text
-// is read into buffer at creation, with sentinals at start and end.
+// is read into buffer at creation, with sentinels at start and end.
 class FileBuff {
   friend class FileBuffRegion;
  private:
@@ -45,8 +46,8 @@
   long  _bufoff;                // Start of buffer file offset
 
   char *_buf;                   // The buffer itself.
-  char *_bigbuf;                // The buffer plus sentinals; actual heap area
-  char *_bufmax;                // A pointer to the buffer end sentinal
+  char *_bigbuf;                // The buffer plus sentinels; actual heap area
+  char *_bufmax;                // A pointer to the buffer end sentinel
   char *_bufeol;                // A pointer to the last complete line end
 
   int   _err;                   // Error flag for file seek/read operations
@@ -72,7 +73,7 @@
 
   // This converts a pointer into the buffer to a file offset.  It only works
   // when the pointer is valid (i.e. just obtained from getline()).
-  int getoff(const char *s) { return _bufoff+(int)(s-_buf); }
+  long getoff(const char* s) { return _bufoff + (s - _buf); }
 };
 
 //------------------------------FileBuffRegion---------------------------------
@@ -95,8 +96,6 @@
   FileBuffRegion *copy();                   // Deep copy
   FileBuffRegion *merge(FileBuffRegion*); // Merge 2 regions; delete input
 
-//  void print(std::ostream&);
-//  friend std::ostream& operator<< (std::ostream&, FileBuffRegion&);
   void print(ostream&);
   friend ostream& operator<< (ostream&, FileBuffRegion&);
 };
--- a/hotspot/src/share/vm/adlc/forms.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/forms.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -70,6 +70,7 @@
   else return (_iter <_cur-1 ? _names[++_iter] : NULL);
 }
 const char  *NameList::current() { return (_iter < _cur ? _names[_iter] : NULL); }
+const char  *NameList::peek(int skip) { return (_iter + skip < _cur ? _names[_iter + skip] : NULL); }
 
 // Return 'true' if current entry is signal
 bool  NameList::current_is_signal() {
@@ -248,11 +249,13 @@
 // True if 'opType', an ideal name, loads or stores.
 Form::DataType Form::is_load_from_memory(const char *opType) const {
   if( strcmp(opType,"LoadB")==0 )  return Form::idealB;
+  if( strcmp(opType,"LoadUB")==0 )  return Form::idealB;
   if( strcmp(opType,"LoadUS")==0 )  return Form::idealC;
   if( strcmp(opType,"LoadD")==0 )  return Form::idealD;
   if( strcmp(opType,"LoadD_unaligned")==0 )  return Form::idealD;
   if( strcmp(opType,"LoadF")==0 )  return Form::idealF;
   if( strcmp(opType,"LoadI")==0 )  return Form::idealI;
+  if( strcmp(opType,"LoadUI2L")==0 )  return Form::idealI;
   if( strcmp(opType,"LoadKlass")==0 )  return Form::idealP;
   if( strcmp(opType,"LoadNKlass")==0 ) return Form::idealN;
   if( strcmp(opType,"LoadL")==0 )  return Form::idealL;
@@ -370,7 +373,7 @@
 }
 
 // Print out the dictionary contents as key-value pairs
-static void dumpkey (const void* key)  { fprintf(stdout, "%s", key); }
+static void dumpkey (const void* key)  { fprintf(stdout, "%s", (char*) key); }
 static void dumpform(const void* form) { fflush(stdout); ((Form*)form)->dump(); }
 
 void FormDict::dump() {
--- a/hotspot/src/share/vm/adlc/forms.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/forms.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -124,7 +124,7 @@
 public:
   // Public Data
   Form *_next;                     // Next pointer for form lists
-  long  _linenum;                  // Line number for debugging
+  int   _linenum;                  // Line number for debugging
 
   // Dynamic type check for common forms.
   virtual OpClassForm   *is_opclass()     const;
@@ -342,6 +342,7 @@
   void  reset();                   // Reset iteration
   const char *iter();              // after reset(), first element : else next
   const char *current();           // return current element in iteration.
+  const char *peek(int skip = 1);  // returns element + skip in iteration if there is one
 
   bool  current_is_signal();       // Return 'true' if current entry is signal
   bool  is_signal(const char *entry); // Return true if entry is a signal
--- a/hotspot/src/share/vm/adlc/formsopt.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/formsopt.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1998-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1998-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -273,13 +273,13 @@
   for(_regDefs.reset(); (name = _regDefs.iter()) != NULL;) {
     int rnum = ((RegDef*)_regDef[name])->register_num();
     if( (rnum >> 5) == wordnum )
-      word |= (1L<<(rnum&31));
+      word |= (1 << (rnum & 31));
   }
   if( stack_also ) {
     // Now also collect stack bits
     for( int i = 0; i < 32; i++ )
       if( wordnum*32+i >= RegisterForm::_reg_ctr )
-        word |= (1L<<i);
+        word |= (1 << i);
   }
 
   return word;
@@ -592,10 +592,10 @@
                                  int input) {
   if( position > _max_position ) _max_position = position;
 
-  _parent.addName((char *)parent);
-  _position.addName((char *)position);
+  _parent.addName((char*) (intptr_t) parent);
+  _position.addName((char*) (intptr_t) position);
   _instrs.addName(name);
-  _input.addName((char *)input);
+  _input.addName((char*) (intptr_t) input);
 }
 
 // Access info about instructions in the peep-match rule
@@ -603,7 +603,7 @@
   return _max_position;
 }
 
-const char *PeepMatch::instruction_name(intptr_t position) {
+const char *PeepMatch::instruction_name(int position) {
   return _instrs.name(position);
 }
 
@@ -615,11 +615,11 @@
   _input.reset();
 }
 
-void  PeepMatch::next_instruction( intptr_t &parent, intptr_t &position, const char * &name, intptr_t &input ){
-  parent   = (intptr_t)_parent.iter();
-  position = (intptr_t)_position.iter();
+void  PeepMatch::next_instruction(int &parent, int &position, const char* &name, int &input) {
+  parent   = (int) (intptr_t) _parent.iter();
+  position = (int) (intptr_t) _position.iter();
   name     = _instrs.iter();
-  input    = (intptr_t)_input.iter();
+  input    = (int) (intptr_t) _input.iter();
 }
 
 // 'true' if current position in iteration is a placeholder, not matched.
@@ -637,15 +637,15 @@
 }
 
 //------------------------------PeepConstraint---------------------------------
-PeepConstraint::PeepConstraint(intptr_t  left_inst,  char *left_op, char *relation,
-                               intptr_t  right_inst, char *right_op)
+PeepConstraint::PeepConstraint(int left_inst,  char* left_op, char* relation,
+                               int right_inst, char* right_op)
   : _left_inst(left_inst), _left_op(left_op), _relation(relation),
     _right_inst(right_inst), _right_op(right_op), _next(NULL) {}
 PeepConstraint::~PeepConstraint() {
 }
 
 // Check if constraints use instruction at position
-bool PeepConstraint::constrains_instruction(intptr_t position) {
+bool PeepConstraint::constrains_instruction(int position) {
   // Check local instruction constraints
   if( _left_inst  == position ) return true;
   if( _right_inst == position ) return true;
@@ -692,7 +692,7 @@
 }
 void  PeepReplace::add_operand( int inst_num, char *inst_operand ) {
   _instruction.add_signal();
-  _operand_inst_num.addName((char*)inst_num);
+  _operand_inst_num.addName((char*) (intptr_t) inst_num);
   _operand_op_name.addName(inst_operand);
 }
 
@@ -702,15 +702,15 @@
   _operand_inst_num.reset();
   _operand_op_name.reset();
 }
-void  PeepReplace::next_instruction(const char * &inst){
+void  PeepReplace::next_instruction(const char* &inst){
   inst                     = _instruction.iter();
-  intptr_t   inst_num      = (intptr_t)_operand_inst_num.iter();
-  const char *inst_operand = _operand_op_name.iter();
+  int         inst_num     = (int) (intptr_t) _operand_inst_num.iter();
+  const char* inst_operand = _operand_op_name.iter();
 }
-void  PeepReplace::next_operand( intptr_t &inst_num, const char * &inst_operand ) {
-  const char *inst   = _instruction.iter();
-  inst_num           = (intptr_t)_operand_inst_num.iter();
-  inst_operand       = _operand_op_name.iter();
+void  PeepReplace::next_operand(int &inst_num, const char* &inst_operand) {
+  const char* inst = _instruction.iter();
+  inst_num         = (int) (intptr_t) _operand_inst_num.iter();
+  inst_operand     = _operand_op_name.iter();
 }
 
 
--- a/hotspot/src/share/vm/adlc/formsopt.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/formsopt.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1998-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1998-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -457,10 +457,10 @@
 
   // Access info about instructions in the peep-match rule
   int   max_position();
-  const char *instruction_name(intptr_t position);
+  const char *instruction_name(int position);
   // Iterate through all info on matched instructions
   void  reset();
-  void  next_instruction( intptr_t &parent, intptr_t &position, const char * &name, intptr_t &input );
+  void  next_instruction(int &parent, int &position, const char* &name, int &input);
   // 'true' if current position in iteration is a placeholder, not matched.
   bool  is_placeholder();
 
@@ -474,20 +474,20 @@
   PeepConstraint *_next;           // Additional constraints ANDed together
 
 public:
-  const intptr_t  _left_inst;
-  const char      *_left_op;
-  const char      *_relation;
-  const intptr_t  _right_inst;
-  const char      *_right_op;
+  const int   _left_inst;
+  const char* _left_op;
+  const char* _relation;
+  const int   _right_inst;
+  const char* _right_op;
 
 public:
   // Public Methods
-  PeepConstraint(intptr_t  left_inst,  char *left_op, char *relation,
-                 intptr_t  right_inst, char *right_op);
+  PeepConstraint(int left_inst,  char* left_op, char* relation,
+                 int right_inst, char* right_op);
   ~PeepConstraint();
 
   // Check if constraints use instruction at position
-  bool constrains_instruction(intptr_t position);
+  bool constrains_instruction(int position);
 
   // Add another constraint
   void append(PeepConstraint *next_peep_constraint);
@@ -519,7 +519,7 @@
   // Access contents of peepreplace
   void  reset();
   void  next_instruction(const char * &root);
-  void  next_operand( intptr_t &inst_num, const char * &inst_operand );
+  void  next_operand(int &inst_num, const char * &inst_operand );
 
   // Utilities
   void dump();
--- a/hotspot/src/share/vm/adlc/formssel.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/formssel.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -844,8 +844,12 @@
   for (_parameters.reset(); (name = _parameters.iter()) != NULL;) {
     OperandForm *opForm = (OperandForm*)_localNames[name];
 
-    const Form *form = _effects[name];
-    Effect     *e    = form ? form->is_effect() : NULL;
+    Effect* e = NULL;
+    {
+      const Form* form = _effects[name];
+      e = form ? form->is_effect() : NULL;
+    }
+
     if (e != NULL) {
       has_temp |= e->is(Component::TEMP);
 
@@ -858,19 +862,7 @@
         OperandForm* kill = (OperandForm*)_localNames[kill_name];
         globalAD->syntax_err(_linenum, "%s: %s %s must be at the end of the argument list\n",
                              _ident, kill->_ident, kill_name);
-      } else if (e->isa(Component::KILL)) {
-        kill_name = name;
-      }
-
-      // TEMPs are real uses and need to be among the first parameters
-      // listed, otherwise the numbering of operands and inputs gets
-      // screwy, so enforce this restriction during parse.
-      if (kill_name != NULL &&
-          e->isa(Component::TEMP) && !e->isa(Component::DEF)) {
-        OperandForm* kill = (OperandForm*)_localNames[kill_name];
-        globalAD->syntax_err(_linenum, "%s: %s %s must follow %s %s in the argument list\n",
-                             _ident, kill->_ident, kill_name, opForm->_ident, name);
-      } else if (e->isa(Component::KILL)) {
+      } else if (e->isa(Component::KILL) && !e->isa(Component::USE)) {
         kill_name = name;
       }
     }
@@ -1122,7 +1114,7 @@
   const char *op_name            = NULL;
   const char *reg_type           = NULL;
   FormDict   &globals            = AD.globalNames();
-  cisc_spill_operand = _matrule->cisc_spill_match(globals, AD.get_registers(), instr->_matrule, op_name, reg_type);
+  cisc_spill_operand = _matrule->matchrule_cisc_spill_match(globals, AD.get_registers(), instr->_matrule, op_name, reg_type);
   if( (cisc_spill_operand != Not_cisc_spillable) && (op_name != NULL) && equivalent_predicates(this, instr) ) {
     cisc_spill_operand = operand_position(op_name, Component::USE);
     int def_oper  = operand_position(op_name, Component::DEF);
@@ -1217,13 +1209,17 @@
 // Seach through operands to determine parameters unique positions.
 void InstructForm::set_unique_opnds() {
   uint* uniq_idx = NULL;
-  uint  nopnds = num_opnds();
+  int  nopnds = num_opnds();
   uint  num_uniq = nopnds;
-  uint i;
+  int i;
+  _uniq_idx_length = 0;
   if ( nopnds > 0 ) {
-    // Allocate index array with reserve.
-    uniq_idx = (uint*) malloc(sizeof(uint)*(nopnds + 2));
-    for( i = 0; i < nopnds+2; i++ ) {
+    // Allocate index array.  Worst case we're mapping from each
+    // component back to an index and any DEF always goes at 0 so the
+    // length of the array has to be the number of components + 1.
+    _uniq_idx_length = _components.count() + 1;
+    uniq_idx = (uint*) malloc(sizeof(uint)*(_uniq_idx_length));
+    for( i = 0; i < _uniq_idx_length; i++ ) {
       uniq_idx[i] = i;
     }
   }
@@ -1238,8 +1234,8 @@
     _parameters.reset();
     while( (name = _parameters.iter()) != NULL ) {
       count = 0;
-      uint position = 0;
-      uint uniq_position = 0;
+      int position = 0;
+      int uniq_position = 0;
       _components.reset();
       Component *comp = NULL;
       if( sets_result() ) {
@@ -1255,6 +1251,7 @@
         }
         if( strcmp(name, comp->_name)==0 ) {
           if( ++count > 1 ) {
+            assert(position < _uniq_idx_length, "out of bounds");
             uniq_idx[position] = uniq_position;
             has_dupl_use = true;
           } else {
@@ -1284,7 +1281,7 @@
   _num_uniq = num_uniq;
 }
 
-// Generate index values needed for determing the operand position
+// Generate index values needed for determining the operand position
 void InstructForm::index_temps(FILE *fp, FormDict &globals, const char *prefix, const char *receiver) {
   uint  idx = 0;                  // position of operand in match rule
   int   cur_num_opnds = num_opnds();
@@ -2200,8 +2197,8 @@
 // Return zero-based position in component list, only counting constants;
 // Return -1 if not in list.
 int OperandForm::constant_position(FormDict &globals, const Component *last) {
-  // Iterate through components and count constants preceeding 'constant'
-  uint  position = 0;
+  // Iterate through components and count constants preceding 'constant'
+  int position = 0;
   Component *comp;
   _components.reset();
   while( (comp = _components.iter()) != NULL  && (comp != last) ) {
@@ -2238,7 +2235,7 @@
 // Return zero-based position in component list, only counting constants;
 // Return -1 if not in list.
 int OperandForm::register_position(FormDict &globals, const char *reg_name) {
-  // Iterate through components and count registers preceeding 'last'
+  // Iterate through components and count registers preceding 'last'
   uint  position = 0;
   Component *comp;
   _components.reset();
@@ -2304,7 +2301,7 @@
   if ( op->is_base_constant(globals) == Form::idealP ) {
     // Find the constant's index:  _c0, _c1, _c2, ... , _cN
     uint idx  = op->constant_position( globals, rep_var);
-    fprintf(fp,"  virtual bool disp_is_oop() const {", _ident);
+    fprintf(fp,"  virtual bool disp_is_oop() const {");
     fprintf(fp,  "  return _c%d->isa_oop_ptr();", idx);
     fprintf(fp, " }\n");
   }
@@ -3042,9 +3039,9 @@
 
 // Recursive call collecting info on top-level operands, not transitive.
 // Implementation does not modify state of internal structures.
-void MatchNode::append_components(FormDict &locals, ComponentList &components,
-                                  bool deflag) const {
-  int   usedef = deflag ? Component::DEF : Component::USE;
+void MatchNode::append_components(FormDict& locals, ComponentList& components,
+                                  bool def_flag) const {
+  int usedef = def_flag ? Component::DEF : Component::USE;
   FormDict &globals = _AD.globalNames();
 
   assert (_name != NULL, "MatchNode::build_components encountered empty node\n");
@@ -3062,10 +3059,10 @@
     return;
   }
   // Promote results of "Set" to DEF
-  bool def_flag = (!strcmp(_opType, "Set")) ? true : false;
-  if (_lChild) _lChild->append_components(locals, components, def_flag);
-  def_flag = false;   // only applies to component immediately following 'Set'
-  if (_rChild) _rChild->append_components(locals, components, def_flag);
+  bool tmpdef_flag = (!strcmp(_opType, "Set")) ? true : false;
+  if (_lChild) _lChild->append_components(locals, components, tmpdef_flag);
+  tmpdef_flag = false;   // only applies to component immediately following 'Set'
+  if (_rChild) _rChild->append_components(locals, components, tmpdef_flag);
 }
 
 // Find the n'th base-operand in the match node,
@@ -3313,8 +3310,8 @@
   static const char *needs_ideal_memory_list[] = {
     "StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" ,
     "StoreB","StoreC","Store" ,"StoreFP",
-    "LoadI" ,"LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF"  ,
-    "LoadB" ,"LoadUS" ,"LoadS" ,"Load"   ,
+    "LoadI", "LoadUI2L", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF"  ,
+    "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load"   ,
     "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B",
     "Store8B","Store4B","Store8C","Store4C","Store2C",
     "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" ,
@@ -3411,9 +3408,9 @@
   return (form1 == form2);
 }
 
-//-------------------------cisc_spill_match------------------------------------
+//-------------------------cisc_spill_match_node-------------------------------
 // Recursively check two MatchRules for legal conversion via cisc-spilling
-int  MatchNode::cisc_spill_match(FormDict &globals, RegisterForm *registers, MatchNode *mRule2, const char * &operand, const char * &reg_type) {
+int MatchNode::cisc_spill_match(FormDict& globals, RegisterForm* registers, MatchNode* mRule2, const char* &operand, const char* &reg_type) {
   int cisc_spillable  = Maybe_cisc_spillable;
   int left_spillable  = Maybe_cisc_spillable;
   int right_spillable = Maybe_cisc_spillable;
@@ -3434,10 +3431,16 @@
     const InstructForm *form2_inst = form2 ? form2->is_instruction() : NULL;
     const char *name_left  = mRule2->_lChild ? mRule2->_lChild->_opType : NULL;
     const char *name_right = mRule2->_rChild ? mRule2->_rChild->_opType : NULL;
+    DataType data_type = Form::none;
+    if (form->is_operand()) {
+      // Make sure the loadX matches the type of the reg
+      data_type = form->ideal_to_Reg_type(form->is_operand()->ideal_type(globals));
+    }
     // Detect reg vs (loadX memory)
     if( form->is_cisc_reg(globals)
         && form2_inst
-        && (is_load_from_memory(mRule2->_opType) != Form::none) // reg vs. (load memory)
+        && data_type != Form::none
+        && (is_load_from_memory(mRule2->_opType) == data_type) // reg vs. (load memory)
         && (name_left != NULL)       // NOT (load)
         && (name_right == NULL) ) {  // NOT (load memory foo)
       const Form *form2_left = name_left ? globals[name_left] : NULL;
@@ -3487,13 +3490,13 @@
   return cisc_spillable;
 }
 
-//---------------------------cisc_spill_match----------------------------------
+//---------------------------cisc_spill_match_rule------------------------------
 // Recursively check two MatchRules for legal conversion via cisc-spilling
 // This method handles the root of Match tree,
 // general recursive checks done in MatchNode
-int  MatchRule::cisc_spill_match(FormDict &globals, RegisterForm *registers,
-                                 MatchRule *mRule2, const char * &operand,
-                                 const char * &reg_type) {
+int  MatchRule::matchrule_cisc_spill_match(FormDict& globals, RegisterForm* registers,
+                                           MatchRule* mRule2, const char* &operand,
+                                           const char* &reg_type) {
   int cisc_spillable  = Maybe_cisc_spillable;
   int left_spillable  = Maybe_cisc_spillable;
   int right_spillable = Maybe_cisc_spillable;
@@ -3531,7 +3534,7 @@
 //----------------------------- equivalent ------------------------------------
 // Recursively check to see if two match rules are equivalent.
 // This rule handles the root.
-bool MatchRule::equivalent(FormDict &globals, MatchRule *mRule2) {
+bool MatchRule::equivalent(FormDict &globals, MatchNode *mRule2) {
   // Check that each sets a result
   if (sets_result() != mRule2->sets_result()) {
     return false;
@@ -3647,7 +3650,7 @@
 
 //-------------------------- swap_commutative_op ------------------------------
 // Recursively swap specified commutative operation with subtree operands.
-void MatchRule::swap_commutative_op(const char* instr_ident, int count, int& match_rules_cnt) {
+void MatchRule::matchrule_swap_commutative_op(const char* instr_ident, int count, int& match_rules_cnt) {
   assert(match_rules_cnt < 100," too many match rule clones");
   // Clone
   MatchRule* clone = new MatchRule(_AD, this);
@@ -3660,8 +3663,8 @@
   clone->_next = this->_next;
   this-> _next = clone;
   if( (--count) > 0 ) {
-    this-> swap_commutative_op(instr_ident, count, match_rules_cnt);
-    clone->swap_commutative_op(instr_ident, count, match_rules_cnt);
+    this-> matchrule_swap_commutative_op(instr_ident, count, match_rules_cnt);
+    clone->matchrule_swap_commutative_op(instr_ident, count, match_rules_cnt);
   }
 }
 
@@ -3693,7 +3696,7 @@
 
 // Recursive call collecting info on top-level operands, not transitive.
 // Implementation does not modify state of internal structures.
-void MatchRule::append_components(FormDict &locals, ComponentList &components) const {
+void MatchRule::append_components(FormDict& locals, ComponentList& components, bool def_flag) const {
   assert (_name != NULL, "MatchNode::build_components encountered empty node\n");
 
   MatchNode::append_components(locals, components,
--- a/hotspot/src/share/vm/adlc/formssel.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/formssel.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1998-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -101,6 +101,7 @@
   const char    *_ins_pipe;        // Instruction Scheduline description class
 
   uint          *_uniq_idx;        // Indexes of unique operands
+  int            _uniq_idx_length; // Length of _uniq_idx array
   uint           _num_uniq;        // Number  of unique operands
   ComponentList  _components;      // List of Components matches MachNode's
                                    // operand structure
@@ -257,11 +258,13 @@
   void                set_unique_opnds();
   uint                num_unique_opnds() { return _num_uniq; }
   uint                unique_opnds_idx(int idx) {
-                        if( _uniq_idx != NULL && idx > 0 )
+                        if( _uniq_idx != NULL && idx > 0 ) {
+                          assert(idx < _uniq_idx_length, "out of bounds");
                           return _uniq_idx[idx];
-                        else
+                        } else {
                           return idx;
-                      }
+                        }
+  }
 
   // Operands which are only KILLs aren't part of the input array and
   // require special handling in some cases.  Their position in this
@@ -274,7 +277,7 @@
   //
   // Generate the format call for the replacement variable
   void                rep_var_format(FILE *fp, const char *rep_var);
-  // Generate index values needed for determing the operand position
+  // Generate index values needed for determining the operand position
   void                index_temps   (FILE *fp, FormDict &globals, const char *prefix = "", const char *receiver = "");
   // ---------------------------
 
@@ -341,7 +344,7 @@
 
   // --------------------------- Code Block
   // Add code
-  void add_code(const char *string_preceeding_replacement_var);
+  void add_code(const char *string_preceding_replacement_var);
   // Add a replacement variable or one of its subfields
   // Subfields are stored with a leading '$'
   void add_rep_var(char *replacement_var);
@@ -917,8 +920,8 @@
   // return 1 if found and position is incremented by operand offset in rule
   bool       find_name(const char *str, int &position) const;
   bool       find_type(const char *str, int &position) const;
-  void       append_components(FormDict &locals, ComponentList &components,
-                               bool def_flag) const;
+  virtual void append_components(FormDict& locals, ComponentList& components,
+                                 bool def_flag = false) const;
   bool       base_operand(uint &position, FormDict &globals,
                          const char * &result, const char * &name,
                          const char * &opType) const;
@@ -944,12 +947,12 @@
   const char *reduce_left (FormDict &globals)  const;
 
   // Recursive version of check in MatchRule
-  int        cisc_spill_match(FormDict &globals, RegisterForm *registers,
-                              MatchNode *mRule2, const char * &operand,
-                              const char * &reg_type);
+  int        cisc_spill_match(FormDict& globals, RegisterForm* registers,
+                              MatchNode* mRule2, const char* &operand,
+                              const char* &reg_type);
   int        cisc_spill_merge(int left_result, int right_result);
 
-  bool       equivalent(FormDict &globals, MatchNode *mNode2);
+  virtual bool equivalent(FormDict& globals, MatchNode* mNode2);
 
   void       count_commutative_op(int& count);
   void       swap_commutative_op(bool atroot, int count);
@@ -976,7 +979,7 @@
   MatchRule(ArchDesc &ad, MatchNode* mroot, int depth, char* construct, int numleaves);
   ~MatchRule();
 
-  void       append_components(FormDict &locals, ComponentList &components) const;
+  virtual void append_components(FormDict& locals, ComponentList& components, bool def_flag = false) const;
   // Recursive call on all operands' match rules in my match rule.
   bool       base_operand(uint &position, FormDict &globals,
                          const char * &result, const char * &name,
@@ -1003,14 +1006,14 @@
   Form::DataType is_ideal_store() const;// node matches ideal 'StoreXNode'
 
   // Check if 'mRule2' is a cisc-spill variant of this MatchRule
-  int        cisc_spill_match(FormDict &globals, RegisterForm *registers,
-                              MatchRule *mRule2, const char * &operand,
-                              const char * &reg_type);
+  int        matchrule_cisc_spill_match(FormDict &globals, RegisterForm* registers,
+                                        MatchRule* mRule2, const char* &operand,
+                                        const char* &reg_type);
 
   // Check if 'mRule2' is equivalent to this MatchRule
-  bool       equivalent(FormDict &globals, MatchRule *mRule2);
+  virtual bool equivalent(FormDict& globals, MatchNode* mRule2);
 
-  void       swap_commutative_op(const char* instr_ident, int count, int& match_rules_cnt);
+  void       matchrule_swap_commutative_op(const char* instr_ident, int count, int& match_rules_cnt);
 
   void dump();
   void output(FILE *fp);
--- a/hotspot/src/share/vm/adlc/main.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/main.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -349,7 +349,7 @@
     }
     else {
       if (_ADL_file._name) printf("%s --> ", _ADL_file._name);
-      printf("%s, %s, %s, %s, %s, %s, %s, %s, %s",
+      printf("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s",
              _CPP_file._name,
              _CPP_CLONE_file._name,
              _CPP_EXPAND_file._name,
@@ -358,7 +358,8 @@
              _CPP_MISC_file._name,
              _CPP_PEEPHOLE_file._name,
              _CPP_PIPELINE_file._name,
-             _HPP_file._name, _DFA_file._name);
+             _HPP_file._name,
+             _DFA_file._name);
     }
     printf("\n");
   }
@@ -431,7 +432,7 @@
     legal_end = fbuf.get_line();
   }
   *legal_text = legal_start;
-  return (legal_end - legal_start);
+  return (int) (legal_end - legal_start);
 }
 
 // VS2005 has its own definition, identical to this one.
--- a/hotspot/src/share/vm/adlc/output_c.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/output_c.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1998-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -225,11 +225,11 @@
     pipeclass->_parameters.reset();
 
   while ( (paramname = pipeclass->_parameters.iter()) != NULL ) {
-    const PipeClassOperandForm *pipeopnd =
+    const PipeClassOperandForm *tmppipeopnd =
         (const PipeClassOperandForm *)pipeclass->_localUsage[paramname];
 
-    if (pipeopnd)
-      templen += 10 + (int)strlen(pipeopnd->_stage);
+    if (tmppipeopnd)
+      templen += 10 + (int)strlen(tmppipeopnd->_stage);
     else
       templen += 19;
 
@@ -253,10 +253,10 @@
     pipeclass->_parameters.reset();
 
   while ( (paramname = pipeclass->_parameters.iter()) != NULL ) {
-    const PipeClassOperandForm *pipeopnd =
+    const PipeClassOperandForm *tmppipeopnd =
         (const PipeClassOperandForm *)pipeclass->_localUsage[paramname];
     templen += sprintf(&operand_stages[templen], "  stage_%s%c\n",
-      pipeopnd ? pipeopnd->_stage : "undefined",
+      tmppipeopnd ? tmppipeopnd->_stage : "undefined",
       (++i < paramcount ? ',' : ' ') );
   }
 
@@ -1042,10 +1042,10 @@
 
 // Scan the peepmatch and output a test for each instruction
 static void check_peepmatch_instruction_tree(FILE *fp, PeepMatch *pmatch, PeepConstraint *pconstraint) {
-  intptr_t   parent        = -1;
-  intptr_t   inst_position = 0;
-  const char *inst_name    = NULL;
-  intptr_t   input         = 0;
+  int         parent        = -1;
+  int         inst_position = 0;
+  const char* inst_name     = NULL;
+  int         input         = 0;
   fprintf(fp, "      // Check instruction sub-tree\n");
   pmatch->reset();
   for( pmatch->next_instruction( parent, inst_position, inst_name, input );
@@ -1055,14 +1055,14 @@
     if( ! pmatch->is_placeholder() ) {
       // Define temporaries 'inst#', based on parent and parent's input index
       if( parent != -1 ) {                // root was initialized
-        fprintf(fp, "  inst%ld = inst%ld->in(%ld);\n",
+        fprintf(fp, "  inst%d = inst%d->in(%d);\n",
                 inst_position, parent, input);
       }
 
       // When not the root
       // Test we have the correct instruction by comparing the rule
       if( parent != -1 ) {
-        fprintf(fp, "  matches = matches &&  ( inst%ld->rule() == %s_rule );",
+        fprintf(fp, "  matches = matches &&  ( inst%d->rule() == %s_rule );",
                 inst_position, inst_name);
       }
     } else {
@@ -1073,20 +1073,20 @@
   }
 }
 
-static void print_block_index(FILE *fp, intptr_t inst_position) {
+static void print_block_index(FILE *fp, int inst_position) {
   assert( inst_position >= 0, "Instruction number less than zero");
   fprintf(fp, "block_index");
   if( inst_position != 0 ) {
-    fprintf(fp, " - %ld", inst_position);
+    fprintf(fp, " - %d", inst_position);
   }
 }
 
 // Scan the peepmatch and output a test for each instruction
 static void check_peepmatch_instruction_sequence(FILE *fp, PeepMatch *pmatch, PeepConstraint *pconstraint) {
-  intptr_t   parent        = -1;
-  intptr_t   inst_position = 0;
-  const char *inst_name    = NULL;
-  intptr_t   input         = 0;
+  int         parent        = -1;
+  int         inst_position = 0;
+  const char* inst_name     = NULL;
+  int         input         = 0;
   fprintf(fp, "  // Check instruction sub-tree\n");
   pmatch->reset();
   for( pmatch->next_instruction( parent, inst_position, inst_name, input );
@@ -1101,14 +1101,14 @@
         print_block_index(fp, inst_position);
         fprintf(fp, " > 0 ) {\n    Node *n = block->_nodes.at(");
         print_block_index(fp, inst_position);
-        fprintf(fp, ");\n    inst%ld = (n->is_Mach()) ? ", inst_position);
+        fprintf(fp, ");\n    inst%d = (n->is_Mach()) ? ", inst_position);
         fprintf(fp, "n->as_Mach() : NULL;\n  }\n");
       }
 
       // When not the root
       // Test we have the correct instruction by comparing the rule.
       if( parent != -1 ) {
-        fprintf(fp, "  matches = matches && (inst%ld != NULL) && (inst%ld->rule() == %s_rule);\n",
+        fprintf(fp, "  matches = matches && (inst%d != NULL) && (inst%d->rule() == %s_rule);\n",
                 inst_position, inst_position, inst_name);
       }
     } else {
@@ -1121,10 +1121,10 @@
 
 // Build mapping for register indices, num_edges to input
 static void build_instruction_index_mapping( FILE *fp, FormDict &globals, PeepMatch *pmatch ) {
-  intptr_t   parent        = -1;
-  intptr_t   inst_position = 0;
-  const char *inst_name    = NULL;
-  intptr_t   input         = 0;
+  int         parent        = -1;
+  int         inst_position = 0;
+  const char* inst_name     = NULL;
+  int         input         = 0;
   fprintf(fp, "      // Build map to register info\n");
   pmatch->reset();
   for( pmatch->next_instruction( parent, inst_position, inst_name, input );
@@ -1136,9 +1136,9 @@
       InstructForm *inst = globals[inst_name]->is_instruction();
       if( inst != NULL ) {
         char inst_prefix[]  = "instXXXX_";
-        sprintf(inst_prefix, "inst%ld_",   inst_position);
+        sprintf(inst_prefix, "inst%d_",   inst_position);
         char receiver[]     = "instXXXX->";
-        sprintf(receiver,    "inst%ld->", inst_position);
+        sprintf(receiver,    "inst%d->", inst_position);
         inst->index_temps( fp, globals, inst_prefix, receiver );
       }
     }
@@ -1168,7 +1168,7 @@
       }
 
       // LEFT
-      intptr_t left_index  = pconstraint->_left_inst;
+      int left_index       = pconstraint->_left_inst;
       const char *left_op  = pconstraint->_left_op;
       // Access info on the instructions whose operands are compared
       InstructForm *inst_left = globals[pmatch->instruction_name(left_index)]->is_instruction();
@@ -1191,7 +1191,7 @@
 
       // RIGHT
       int right_op_index = -1;
-      intptr_t right_index = pconstraint->_right_inst;
+      int right_index      = pconstraint->_right_inst;
       const char *right_op = pconstraint->_right_op;
       if( right_index != -1 ) { // Match operand
         // Access info on the instructions whose operands are compared
@@ -1351,7 +1351,7 @@
     assert( root_form != NULL, "Replacement instruction was not previously defined");
     fprintf(fp, "        %sNode *root = new (C) %sNode();\n", root_inst, root_inst);
 
-    intptr_t    inst_num;
+    int         inst_num;
     const char *op_name;
     int         opnds_index = 0;            // define result operand
     // Then install the use-operands for the new sub-tree
@@ -1362,7 +1362,6 @@
       InstructForm *inst_form;
       inst_form  = globals[pmatch->instruction_name(inst_num)]->is_instruction();
       assert( inst_form, "Parser should guaranty this is an instruction");
-      int op_base     = inst_form->oper_input_base(globals);
       int inst_op_num = inst_form->operand_position(op_name, Component::USE);
       if( inst_op_num == NameList::Not_in_list )
         inst_op_num = inst_form->operand_position(op_name, Component::USE_DEF);
@@ -1379,32 +1378,32 @@
         // Add unmatched edges from root of match tree
         int op_base = root_form->oper_input_base(globals);
         for( int unmatched_edge = 1; unmatched_edge < op_base; ++unmatched_edge ) {
-          fprintf(fp, "        root->add_req(inst%ld->in(%d));        // unmatched ideal edge\n",
+          fprintf(fp, "        root->add_req(inst%d->in(%d));        // unmatched ideal edge\n",
                                           inst_num, unmatched_edge);
         }
         // If new instruction captures bottom type
         if( root_form->captures_bottom_type() ) {
           // Get bottom type from instruction whose result we are replacing
-          fprintf(fp, "        root->_bottom_type = inst%ld->bottom_type();\n", inst_num);
+          fprintf(fp, "        root->_bottom_type = inst%d->bottom_type();\n", inst_num);
         }
         // Define result register and result operand
-        fprintf(fp, "        ra_->add_reference(root, inst%ld);\n", inst_num);
-        fprintf(fp, "        ra_->set_oop (root, ra_->is_oop(inst%ld));\n", inst_num);
-        fprintf(fp, "        ra_->set_pair(root->_idx, ra_->get_reg_second(inst%ld), ra_->get_reg_first(inst%ld));\n", inst_num, inst_num);
-        fprintf(fp, "        root->_opnds[0] = inst%ld->_opnds[0]->clone(C); // result\n", inst_num);
+        fprintf(fp, "        ra_->add_reference(root, inst%d);\n", inst_num);
+        fprintf(fp, "        ra_->set_oop (root, ra_->is_oop(inst%d));\n", inst_num);
+        fprintf(fp, "        ra_->set_pair(root->_idx, ra_->get_reg_second(inst%d), ra_->get_reg_first(inst%d));\n", inst_num, inst_num);
+        fprintf(fp, "        root->_opnds[0] = inst%d->_opnds[0]->clone(C); // result\n", inst_num);
         fprintf(fp, "        // ----- Done with initial setup -----\n");
       } else {
         if( (op_form == NULL) || (op_form->is_base_constant(globals) == Form::none) ) {
           // Do not have ideal edges for constants after matching
-          fprintf(fp, "        for( unsigned x%d = inst%ld_idx%d; x%d < inst%ld_idx%d; x%d++ )\n",
+          fprintf(fp, "        for( unsigned x%d = inst%d_idx%d; x%d < inst%d_idx%d; x%d++ )\n",
                   inst_op_num, inst_num, inst_op_num,
                   inst_op_num, inst_num, inst_op_num+1, inst_op_num );
-          fprintf(fp, "          root->add_req( inst%ld->in(x%d) );\n",
+          fprintf(fp, "          root->add_req( inst%d->in(x%d) );\n",
                   inst_num, inst_op_num );
         } else {
           fprintf(fp, "        // no ideal edge for constants after matching\n");
         }
-        fprintf(fp, "        root->_opnds[%d] = inst%ld->_opnds[%d]->clone(C);\n",
+        fprintf(fp, "        root->_opnds[%d] = inst%d->_opnds[%d]->clone(C);\n",
                 opnds_index, inst_num, inst_op_num );
       }
       ++opnds_index;
@@ -1443,7 +1442,7 @@
   }
   for( int i = 0; i <= max_position; ++i ) {
     if( i == 0 ) {
-      fprintf(fp, "  MachNode *inst0 = this;\n", i);
+      fprintf(fp, "  MachNode *inst0 = this;\n");
     } else {
       fprintf(fp, "  MachNode *inst%d = NULL;\n", i);
     }
@@ -1743,7 +1742,7 @@
       }
       // delete the rest of edges
       fprintf(fp,"  for(int i = idx%d - 1; i >= (int)idx%d; i--) {\n", cur_num_opnds, new_num_opnds);
-      fprintf(fp,"    del_req(i);\n", i);
+      fprintf(fp,"    del_req(i);\n");
       fprintf(fp,"  }\n");
       fprintf(fp,"  _num_opnds = %d;\n", new_num_opnds);
     }
@@ -1817,7 +1816,7 @@
 
   fprintf(fp,"\n");
   if( node->expands() ) {
-    fprintf(fp,"  return result;\n",cnt-1);
+    fprintf(fp,"  return result;\n");
   } else {
     fprintf(fp,"  return this;\n");
   }
@@ -2140,8 +2139,59 @@
         // A subfield variable, '$$' prefix
         emit_field( rep_var );
       } else {
-        // A replacement variable, '$' prefix
-        emit_rep_var( rep_var );
+        if (_strings_to_emit.peek() != NULL &&
+            strcmp(_strings_to_emit.peek(), "$Address") == 0) {
+          fprintf(_fp, "Address::make_raw(");
+
+          emit_rep_var( rep_var );
+          fprintf(_fp,"->base(ra_,this,idx%d), ", _operand_idx);
+
+          _reg_status = LITERAL_ACCESSED;
+          emit_rep_var( rep_var );
+          fprintf(_fp,"->index(ra_,this,idx%d), ", _operand_idx);
+
+          _reg_status = LITERAL_ACCESSED;
+          emit_rep_var( rep_var );
+          fprintf(_fp,"->scale(), ");
+
+          _reg_status = LITERAL_ACCESSED;
+          emit_rep_var( rep_var );
+          Form::DataType stack_type = _operand ? _operand->is_user_name_for_sReg() : Form::none;
+          if( _operand  && _operand_idx==0 && stack_type != Form::none ) {
+            fprintf(_fp,"->disp(ra_,this,0), ");
+          } else {
+            fprintf(_fp,"->disp(ra_,this,idx%d), ", _operand_idx);
+          }
+
+          _reg_status = LITERAL_ACCESSED;
+          emit_rep_var( rep_var );
+          fprintf(_fp,"->disp_is_oop())");
+
+          // skip trailing $Address
+          _strings_to_emit.iter();
+        } else {
+          // A replacement variable, '$' prefix
+          const char* next = _strings_to_emit.peek();
+          const char* next2 = _strings_to_emit.peek(2);
+          if (next != NULL && next2 != NULL && strcmp(next2, "$Register") == 0 &&
+              (strcmp(next, "$base") == 0 || strcmp(next, "$index") == 0)) {
+            // handle $rev_var$$base$$Register and $rev_var$$index$$Register by
+            // producing as_Register(opnd_array(#)->base(ra_,this,idx1)).
+            fprintf(_fp, "as_Register(");
+            // emit the operand reference
+            emit_rep_var( rep_var );
+            rep_var = _strings_to_emit.iter();
+            assert(strcmp(rep_var, "$base") == 0 || strcmp(rep_var, "$index") == 0, "bad pattern");
+            // handle base or index
+            emit_field(rep_var);
+            rep_var = _strings_to_emit.iter();
+            assert(strcmp(rep_var, "$Register") == 0, "bad pattern");
+            // close up the parens
+            fprintf(_fp, ")");
+          } else {
+            emit_rep_var( rep_var );
+          }
+        }
       } // end replacement and/or subfield
     }
   }
--- a/hotspot/src/share/vm/adlc/output_h.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/adlc/output_h.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1998-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -574,7 +574,7 @@
   // Generate the user-defined portion of the format
   if( inst._format ) {
     // If there are replacement variables,
-    // Generate index values needed for determing the operand position
+    // Generate index values needed for determining the operand position
     if( inst._format->_rep_vars.count() )
       inst.index_temps(fp, globals);
 
@@ -1832,7 +1832,7 @@
         break;
       case Form::idealP:
       case Form::idealN:
-        fprintf(fp,"    return  opnd_array(1)->type();\n",result);
+        fprintf(fp,"    return  opnd_array(1)->type();\n");
         break;
       case Form::idealD:
         fprintf(fp,"    return  TypeD::make(opnd_array(1)->constantD());\n");
--- a/hotspot/src/share/vm/asm/assembler.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/asm/assembler.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -31,7 +31,7 @@
 // The AbstractAssembler is generating code into a CodeBuffer. To make code generation faster,
 // the assembler keeps a copy of the code buffers boundaries & modifies them when
 // emitting bytes rather than using the code buffers accessor functions all the time.
-// The code buffer is updated via set_code_end(...) after emiting a whole instruction.
+// The code buffer is updated via set_code_end(...) after emitting a whole instruction.
 
 AbstractAssembler::AbstractAssembler(CodeBuffer* code) {
   if (code == NULL)  return;
@@ -239,6 +239,78 @@
   }
 }
 
+struct DelayedConstant {
+  typedef void (*value_fn_t)();
+  BasicType type;
+  intptr_t value;
+  value_fn_t value_fn;
+  // This limit of 20 is generous for initial uses.
+  // The limit needs to be large enough to store the field offsets
+  // into classes which do not have statically fixed layouts.
+  // (Initial use is for method handle object offsets.)
+  // Look for uses of "delayed_value" in the source code
+  // and make sure this number is generous enough to handle all of them.
+  enum { DC_LIMIT = 20 };
+  static DelayedConstant delayed_constants[DC_LIMIT];
+  static DelayedConstant* add(BasicType type, value_fn_t value_fn);
+  bool match(BasicType t, value_fn_t cfn) {
+    return type == t && value_fn == cfn;
+  }
+  static void update_all();
+};
+
+DelayedConstant DelayedConstant::delayed_constants[DC_LIMIT];
+// Default C structure initialization rules have the following effect here:
+// = { { (BasicType)0, (intptr_t)NULL }, ... };
+
+DelayedConstant* DelayedConstant::add(BasicType type,
+                                      DelayedConstant::value_fn_t cfn) {
+  for (int i = 0; i < DC_LIMIT; i++) {
+    DelayedConstant* dcon = &delayed_constants[i];
+    if (dcon->match(type, cfn))
+      return dcon;
+    if (dcon->value_fn == NULL) {
+      // (cmpxchg not because this is multi-threaded but because I'm paranoid)
+      if (Atomic::cmpxchg_ptr(CAST_FROM_FN_PTR(void*, cfn), &dcon->value_fn, NULL) == NULL) {
+        dcon->type = type;
+        return dcon;
+      }
+    }
+  }
+  // If this assert is hit (in pre-integration testing!) then re-evaluate
+  // the comment on the definition of DC_LIMIT.
+  guarantee(false, "too many delayed constants");
+  return NULL;
+}
+
+void DelayedConstant::update_all() {
+  for (int i = 0; i < DC_LIMIT; i++) {
+    DelayedConstant* dcon = &delayed_constants[i];
+    if (dcon->value_fn != NULL && dcon->value == 0) {
+      typedef int     (*int_fn_t)();
+      typedef address (*address_fn_t)();
+      switch (dcon->type) {
+      case T_INT:     dcon->value = (intptr_t) ((int_fn_t)    dcon->value_fn)(); break;
+      case T_ADDRESS: dcon->value = (intptr_t) ((address_fn_t)dcon->value_fn)(); break;
+      }
+    }
+  }
+}
+
+intptr_t* AbstractAssembler::delayed_value_addr(int(*value_fn)()) {
+  DelayedConstant* dcon = DelayedConstant::add(T_INT, (DelayedConstant::value_fn_t) value_fn);
+  return &dcon->value;
+}
+intptr_t* AbstractAssembler::delayed_value_addr(address(*value_fn)()) {
+  DelayedConstant* dcon = DelayedConstant::add(T_ADDRESS, (DelayedConstant::value_fn_t) value_fn);
+  return &dcon->value;
+}
+void AbstractAssembler::update_delayed_values() {
+  DelayedConstant::update_all();
+}
+
+
+
 
 void AbstractAssembler::block_comment(const char* comment) {
   if (sect() == CodeBuffer::SECT_INSTS) {
--- a/hotspot/src/share/vm/asm/assembler.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/asm/assembler.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file contains platform-independant assembler declarations.
+// This file contains platform-independent assembler declarations.
 
 class CodeBuffer;
 class MacroAssembler;
@@ -140,6 +140,28 @@
   }
 };
 
+// A union type for code which has to assemble both constant and
+// non-constant operands, when the distinction cannot be made
+// statically.
+class RegisterConstant VALUE_OBJ_CLASS_SPEC {
+ private:
+  Register _r;
+  intptr_t _c;
+
+ public:
+  RegisterConstant(): _r(noreg), _c(0) {}
+  RegisterConstant(Register r): _r(r), _c(0) {}
+  RegisterConstant(intptr_t c): _r(noreg), _c(c) {}
+
+  Register as_register() const { assert(is_register(),""); return _r; }
+  intptr_t as_constant() const { assert(is_constant(),""); return _c; }
+
+  Register register_or_noreg() const { return _r; }
+  intptr_t constant_or_zero() const  { return _c; }
+
+  bool is_register() const { return _r != noreg; }
+  bool is_constant() const { return _r == noreg; }
+};
 
 // The Abstract Assembler: Pure assembler doing NO optimizations on the
 // instruction level; i.e., what you write is what you get.
@@ -280,6 +302,26 @@
   inline address address_constant(Label& L);
   inline address address_table_constant(GrowableArray<Label*> label);
 
+  // Bootstrapping aid to cope with delayed determination of constants.
+  // Returns a static address which will eventually contain the constant.
+  // The value zero (NULL) stands instead of a constant which is still uncomputed.
+  // Thus, the eventual value of the constant must not be zero.
+  // This is fine, since this is designed for embedding object field
+  // offsets in code which must be generated before the object class is loaded.
+  // Field offsets are never zero, since an object's header (mark word)
+  // is located at offset zero.
+  RegisterConstant delayed_value(int(*value_fn)(), Register tmp, int offset = 0) {
+    return delayed_value(delayed_value_addr(value_fn), tmp, offset);
+  }
+  RegisterConstant delayed_value(address(*value_fn)(), Register tmp, int offset = 0) {
+    return delayed_value(delayed_value_addr(value_fn), tmp, offset);
+  }
+  virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, Register tmp, int offset) = 0;
+  // Last overloading is platform-dependent; look in assembler_<arch>.cpp.
+  static intptr_t* delayed_value_addr(int(*constant_fn)());
+  static intptr_t* delayed_value_addr(address(*constant_fn)());
+  static void update_delayed_values();
+
   // Bang stack to trigger StackOverflowError at a safe location
   // implementation delegates to machine-specific bang_stack_with_offset
   void generate_stack_overflow_check( int frame_size_in_bytes );
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -541,7 +541,7 @@
     // is report a value that will meet correctly with any downstream
     // reference types on paths that will truly be executed.  This null type
     // meets with any reference type to yield that same reference type.
-    // (The compiler will generate an unconditonal exception here.)
+    // (The compiler will generate an unconditional exception here.)
     push(null_type());
     return;
   }
--- a/hotspot/src/share/vm/classfile/symbolTable.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/classfile/symbolTable.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -156,7 +156,7 @@
 
   symbolOop test = lookup(index, (char*)name, len, hashValue);
   if (test != NULL) {
-    // A race occured and another thread introduced the symbol, this one
+    // A race occurred and another thread introduced the symbol, this one
     // will be dropped and collected.
     return test;
   }
@@ -193,7 +193,7 @@
     int index = hash_to_index(hashValues[i]);
     symbolOop test = lookup(index, names[i], lengths[i], hashValues[i]);
     if (test != NULL) {
-      // A race occured and another thread introduced the symbol, this one
+      // A race occurred and another thread introduced the symbol, this one
       // will be dropped and collected. Use test instead.
       cp->symbol_at_put(cp_indices[i], test);
     } else {
--- a/hotspot/src/share/vm/code/nmethod.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -380,7 +380,7 @@
 void nmethod::add_handler_for_exception_and_pc(Handle exception, address pc, address handler) {
   // There are potential race conditions during exception cache updates, so we
   // must own the ExceptionCache_lock before doing ANY modifications. Because
-  // we dont lock during reads, it is possible to have several threads attempt
+  // we don't lock during reads, it is possible to have several threads attempt
   // to update the cache with the same data. We need to check for already inserted
   // copies of the current data before adding it.
 
--- a/hotspot/src/share/vm/code/nmethod.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/code/nmethod.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -167,7 +167,7 @@
   nmFlags flags;           // various flags to keep track of nmethod state
   bool _markedForDeoptimization;       // Used for stack deoptimization
   enum { alive        = 0,
-         not_entrant  = 1, // uncommon trap has happend but activations may still exist
+         not_entrant  = 1, // uncommon trap has happened but activations may still exist
          zombie       = 2,
          unloaded     = 3 };
 
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -393,7 +393,7 @@
    // Restarts the concurrent phases timer.
    void concurrent_phases_resume();
 
-   // Time begining and end of the marking phase for
+   // Time beginning and end of the marking phase for
    // a synchronous MS collection.  A MS collection
    // that finishes in the foreground can have started
    // in the background.  These methods capture the
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsGCAdaptivePolicyCounters.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsGCAdaptivePolicyCounters.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -69,7 +69,7 @@
   // end of the sweep of the tenured generation.
   PerfVariable* _avg_cms_free_counter;
   // Average of the free space in the tenured generation at the
-  // start of the sweep of the tenured genertion.
+  // start of the sweep of the tenured generation.
   PerfVariable* _avg_cms_free_at_sweep_counter;
   // Average of the free space in the tenured generation at the
   // after any resizing of the tenured generation at the end
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -4178,7 +4178,7 @@
     // and is deferred for now; see CR# TBF. 07252005YSR. XXX
     assert(!CMSAbortSemantics || tsk.aborted(), "Inconsistency");
     // If _restart_addr is non-NULL, a marking stack overflow
-    // occured; we need to do a fresh marking iteration from the
+    // occurred; we need to do a fresh marking iteration from the
     // indicated restart address.
     if (_foregroundGCIsActive && asynch) {
       // We may be running into repeated stack overflows, having
@@ -4221,7 +4221,7 @@
   // should be incremental with periodic yields.
   _markBitMap.iterate(&markFromRootsClosure);
   // If _restart_addr is non-NULL, a marking stack overflow
-  // occured; we need to do a fresh iteration from the
+  // occurred; we need to do a fresh iteration from the
   // indicated restart address.
   while (_restart_addr != NULL) {
     if (_foregroundGCIsActive && asynch) {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -133,14 +133,12 @@
       _co_tracker.update(false);
 
       if (G1SmoothConcRefine) {
-        start_vtime_sec = os::elapsedVTime();
         prev_buffer_num = curr_buffer_num;
-
         _sts.leave();
         os::sleep(Thread::current(), (jlong) _interval_ms, false);
         _sts.join();
+        start_vtime_sec = os::elapsedVTime();
       }
-
       n_logs++;
     }
     // Make sure we harvest the PYA, if any.
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -420,6 +420,10 @@
 
   _has_overflown(false),
   _concurrent(false),
+  _has_aborted(false),
+  _restart_for_overflow(false),
+  _concurrent_marking_in_progress(false),
+  _should_gray_objects(false),
 
   // _verbose_level set below
 
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -107,7 +107,7 @@
       if (PrintGC) {
         gclog_or_tty->date_stamp(PrintGCDateStamps);
         gclog_or_tty->stamp(PrintGCTimeStamps);
-        tty->print_cr("[GC concurrent-mark-start]");
+        gclog_or_tty->print_cr("[GC concurrent-mark-start]");
       }
 
       if (!g1_policy->in_young_gc_mode()) {
@@ -320,8 +320,6 @@
   set_in_progress();
   clear_started();
   if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting");
-
-  return;
 }
 
 // Note: this method, although exported by the ConcurrentMarkSweepThread,
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -78,8 +78,8 @@
 
 void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
                                    int max_completed_queue,
-                                   Mutex* lock) {
-  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+                                   Mutex* lock, PtrQueueSet* fl_owner) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner);
   set_buffer_size(DCQBarrierQueueBufferSize);
   set_process_completed_threshold(DCQBarrierProcessCompletedThreshold);
 
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -88,7 +88,7 @@
 
   void initialize(Monitor* cbl_mon, Mutex* fl_lock,
                   int max_completed_queue = 0,
-                  Mutex* lock = NULL);
+                  Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL);
 
   // The number of parallel ids that can be claimed to allow collector or
   // mutator threads to do card-processing work.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -136,6 +136,14 @@
   int calls() { return _calls; }
 };
 
+class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure {
+public:
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    *card_ptr = CardTableModRefBS::dirty_card_val();
+    return true;
+  }
+};
+
 YoungList::YoungList(G1CollectedHeap* g1h)
   : _g1h(g1h), _head(NULL),
     _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL),
@@ -812,6 +820,40 @@
   }
 };
 
+class RebuildRSOutOfRegionClosure: public HeapRegionClosure {
+  G1CollectedHeap*   _g1h;
+  UpdateRSOopClosure _cl;
+  int                _worker_i;
+public:
+  RebuildRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) :
+    _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i),
+    _worker_i(worker_i),
+    _g1h(g1)
+  { }
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      _cl.set_from(r);
+      r->oop_iterate(&_cl);
+    }
+    return false;
+  }
+};
+
+class ParRebuildRSTask: public AbstractGangTask {
+  G1CollectedHeap* _g1;
+public:
+  ParRebuildRSTask(G1CollectedHeap* g1)
+    : AbstractGangTask("ParRebuildRSTask"),
+      _g1(g1)
+  { }
+
+  void work(int i) {
+    RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
+    _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
+                                         HeapRegion::RebuildRSClaimValue);
+  }
+};
+
 void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
                                     size_t word_size) {
   ResourceMark rm;
@@ -918,24 +960,35 @@
 
     reset_gc_time_stamp();
     // Since everything potentially moved, we will clear all remembered
-    // sets, and clear all cards.  Later we will also cards in the used
-    // portion of the heap after the resizing (which could be a shrinking.)
-    // We will also reset the GC time stamps of the regions.
+    // sets, and clear all cards.  Later we will rebuild remebered
+    // sets. We will also reset the GC time stamps of the regions.
     PostMCRemSetClearClosure rs_clear(mr_bs());
     heap_region_iterate(&rs_clear);
 
     // Resize the heap if necessary.
     resize_if_necessary_after_full_collection(full ? 0 : word_size);
 
-    // Since everything potentially moved, we will clear all remembered
-    // sets, but also dirty all cards corresponding to used regions.
-    PostMCRemSetInvalidateClosure rs_invalidate(mr_bs());
-    heap_region_iterate(&rs_invalidate);
     if (_cg1r->use_cache()) {
       _cg1r->clear_and_record_card_counts();
       _cg1r->clear_hot_cache();
     }
 
+    // Rebuild remembered sets of all regions.
+    if (ParallelGCThreads > 0) {
+      ParRebuildRSTask rebuild_rs_task(this);
+      assert(check_heap_region_claim_values(
+             HeapRegion::InitialClaimValue), "sanity check");
+      set_par_threads(workers()->total_workers());
+      workers()->run_task(&rebuild_rs_task);
+      set_par_threads(0);
+      assert(check_heap_region_claim_values(
+             HeapRegion::RebuildRSClaimValue), "sanity check");
+      reset_heap_region_claim_values();
+    } else {
+      RebuildRSOutOfRegionClosure rebuild_rs(this);
+      heap_region_iterate(&rebuild_rs);
+    }
+
     if (PrintGC) {
       print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity());
     }
@@ -961,7 +1014,8 @@
     // dirtied, so this should abandon those logs, and set "do_traversal"
     // to true.
     concurrent_g1_refine()->set_pya_restart();
-
+    assert(!G1DeferredRSUpdate
+           || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any");
     assert(regions_accounted_for(), "Region leakage!");
   }
 
@@ -1466,6 +1520,13 @@
                                                   G1DirtyCardQueueMax,
                                                   Shared_DirtyCardQ_lock);
   }
+  if (G1DeferredRSUpdate) {
+    dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
+                                      DirtyCardQ_FL_lock,
+                                      0,
+                                      Shared_DirtyCardQ_lock,
+                                      &JavaThread::dirty_card_queue_set());
+  }
   // In case we're keeping closure specialization stats, initialize those
   // counts and that mechanism.
   SpecializationStats::clear();
@@ -2316,7 +2377,6 @@
 void
 G1CollectedHeap::checkConcurrentMark() {
     VerifyMarkedObjsClosure verifycl(this);
-    doConcurrentMark();
     //    MutexLockerEx x(getMarkBitMapLock(),
     //              Mutex::_no_safepoint_check_flag);
     object_iterate(&verifycl);
@@ -2493,7 +2553,7 @@
 
     guarantee(_in_cset_fast_test == NULL, "invariant");
     guarantee(_in_cset_fast_test_base == NULL, "invariant");
-    _in_cset_fast_test_length = n_regions();
+    _in_cset_fast_test_length = max_regions();
     _in_cset_fast_test_base =
                              NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length);
     memset(_in_cset_fast_test_base, false,
@@ -2513,7 +2573,7 @@
     }
     save_marks();
 
-    // We must do this before any possible evacuation that should propogate
+    // We must do this before any possible evacuation that should propagate
     // marks, including evacuation of popular objects in a popular pause.
     if (mark_in_progress()) {
       double start_time_sec = os::elapsedTime();
@@ -2626,9 +2686,8 @@
 #endif // SCAN_ONLY_VERBOSE
 
     double end_time_sec = os::elapsedTime();
-    if (!evacuation_failed()) {
-      g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
-    }
+    double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
+    g1_policy()->record_pause_time_ms(pause_time_ms);
     GCOverheadReporter::recordSTWEnd(end_time_sec);
     g1_policy()->record_collection_pause_end(popular_region != NULL,
                                              abandoned);
@@ -2919,27 +2978,51 @@
   }
 };
 
-class RecreateRSetEntriesClosure: public OopClosure {
+class UpdateRSetImmediate : public OopsInHeapRegionClosure {
 private:
   G1CollectedHeap* _g1;
   G1RemSet* _g1_rem_set;
-  HeapRegion* _from;
 public:
-  RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) :
-    _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from)
-  {}
+  UpdateRSetImmediate(G1CollectedHeap* g1) :
+    _g1(g1), _g1_rem_set(g1->g1_rem_set()) {}
 
   void do_oop(narrowOop* p) {
     guarantee(false, "NYI");
   }
   void do_oop(oop* p) {
     assert(_from->is_in_reserved(p), "paranoia");
-    if (*p != NULL) {
-      _g1_rem_set->write_ref(_from, p);
+    if (*p != NULL && !_from->is_survivor()) {
+      _g1_rem_set->par_write_ref(_from, p, 0);
     }
   }
 };
 
+class UpdateRSetDeferred : public OopsInHeapRegionClosure {
+private:
+  G1CollectedHeap* _g1;
+  DirtyCardQueue *_dcq;
+  CardTableModRefBS* _ct_bs;
+
+public:
+  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
+    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (!_from->is_in_reserved(*p) && !_from->is_survivor()) {
+      size_t card_index = _ct_bs->index_for(p);
+      if (_ct_bs->mark_card_deferred(card_index)) {
+        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
+      }
+    }
+  }
+};
+
+
+
 class RemoveSelfPointerClosure: public ObjectClosure {
 private:
   G1CollectedHeap* _g1;
@@ -2947,11 +3030,11 @@
   HeapRegion* _hr;
   size_t _prev_marked_bytes;
   size_t _next_marked_bytes;
+  OopsInHeapRegionClosure *_cl;
 public:
-  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) :
-    _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr),
-    _prev_marked_bytes(0), _next_marked_bytes(0)
-  {}
+  RemoveSelfPointerClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* cl) :
+    _g1(g1), _cm(_g1->concurrent_mark()),  _prev_marked_bytes(0),
+    _next_marked_bytes(0), _cl(cl) {}
 
   size_t prev_marked_bytes() { return _prev_marked_bytes; }
   size_t next_marked_bytes() { return _next_marked_bytes; }
@@ -2989,8 +3072,7 @@
       // that, if evacuation fails, we might have remembered set
       // entries missing given that we skipped cards on the
       // collection set. So, we'll recreate such entries now.
-      RecreateRSetEntriesClosure cl(_g1, _hr);
-      obj->oop_iterate(&cl);
+      obj->oop_iterate(_cl);
       assert(_cm->isPrevMarked(obj), "Should be marked!");
     } else {
       // The object has been either evacuated or is dead. Fill it with a
@@ -3003,14 +3085,23 @@
 };
 
 void G1CollectedHeap::remove_self_forwarding_pointers() {
+  UpdateRSetImmediate immediate_update(_g1h);
+  DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
+  UpdateRSetDeferred deferred_update(_g1h, &dcq);
+  OopsInHeapRegionClosure *cl;
+  if (G1DeferredRSUpdate) {
+    cl = &deferred_update;
+  } else {
+    cl = &immediate_update;
+  }
   HeapRegion* cur = g1_policy()->collection_set();
-
   while (cur != NULL) {
     assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
 
+    RemoveSelfPointerClosure rspc(_g1h, cl);
     if (cur->evacuation_failed()) {
-      RemoveSelfPointerClosure rspc(_g1h, cur);
       assert(cur->in_collection_set(), "bad CS");
+      cl->set_region(cur);
       cur->object_iterate(&rspc);
 
       // A number of manipulations to make the TAMS be the current top,
@@ -3519,6 +3610,9 @@
 protected:
   G1CollectedHeap* _g1h;
   RefToScanQueue*  _refs;
+  DirtyCardQueue   _dcq;
+  CardTableModRefBS* _ct_bs;
+  G1RemSet* _g1_rem;
 
   typedef GrowableArray<oop*> OverflowQueue;
   OverflowQueue* _overflowed_refs;
@@ -3560,10 +3654,32 @@
 
   void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
 
+  DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
+  CardTableModRefBS* ctbs()                      { return _ct_bs; }
+
+  void immediate_rs_update(HeapRegion* from, oop* p, int tid) {
+    _g1_rem->par_write_ref(from, p, tid);
+  }
+
+  void deferred_rs_update(HeapRegion* from, oop* p, int tid) {
+    // If the new value of the field points to the same region or
+    // is the to-space, we don't need to include it in the Rset updates.
+    if (!from->is_in_reserved(*p) && !from->is_survivor()) {
+      size_t card_index = ctbs()->index_for(p);
+      // If the card hasn't been added to the buffer, do it.
+      if (ctbs()->mark_card_deferred(card_index)) {
+        dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index));
+      }
+    }
+  }
+
 public:
   G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
     : _g1h(g1h),
       _refs(g1h->task_queue(queue_num)),
+      _dcq(&g1h->dirty_card_queue_set()),
+      _ct_bs((CardTableModRefBS*)_g1h->barrier_set()),
+      _g1_rem(g1h->g1_rem_set()),
       _hash_seed(17), _queue_num(queue_num),
       _term_attempts(0),
       _age_table(false),
@@ -3641,6 +3757,14 @@
   int refs_to_scan()                             { return refs()->size();                 }
   int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
 
+  void update_rs(HeapRegion* from, oop* p, int tid) {
+    if (G1DeferredRSUpdate) {
+      deferred_rs_update(from, p, tid);
+    } else {
+      immediate_rs_update(from, p, tid);
+    }
+  }
+
   HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
 
     HeapWord* obj = NULL;
@@ -3809,7 +3933,6 @@
   }
 };
 
-
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state) { }
@@ -3835,7 +3958,7 @@
       assert(obj == *p, "the value of *p should not have changed");
       _par_scan_state->push_on_queue(p);
     } else {
-      _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     }
   }
 }
@@ -3973,13 +4096,13 @@
     }
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
-      _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     }
   }
 
   // When scanning moved objs, must look at all oops.
   if (barrier == G1BarrierEvac && obj != NULL) {
-    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+    _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
   }
 
   if (do_gen_barrier && obj != NULL) {
@@ -4128,6 +4251,7 @@
     G1ParScanExtRootClosure         only_scan_root_cl(_g1h, &pss);
     G1ParScanPermClosure            only_scan_perm_cl(_g1h, &pss);
     G1ParScanHeapRSClosure          only_scan_heap_rs_cl(_g1h, &pss);
+
     G1ParScanAndMarkExtRootClosure  scan_mark_root_cl(_g1h, &pss);
     G1ParScanAndMarkPermClosure     scan_mark_perm_cl(_g1h, &pss);
     G1ParScanAndMarkHeapRSClosure   scan_mark_heap_rs_cl(_g1h, &pss);
@@ -4383,7 +4507,6 @@
   g1_rem_set()->prepare_for_oops_into_collection_set_do();
   concurrent_g1_refine()->set_use_cache(false);
   int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
-
   set_par_threads(n_workers);
   G1ParTask g1_par_task(this, n_workers, _task_queues);
 
@@ -4391,8 +4514,9 @@
 
   change_strong_roots_parity();  // In preparation for parallel strong roots.
   rem_set()->prepare_for_younger_refs_iterate(true);
+
+  assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
   double start_par = os::elapsedTime();
-
   if (ParallelGCThreads > 0) {
     // The individual threads will set their evac-failure closures.
     workers()->run_task(&g1_par_task);
@@ -4412,8 +4536,8 @@
     G1KeepAliveClosure keep_alive(this);
     JNIHandles::weak_oops_do(&is_alive, &keep_alive);
   }
-
   g1_rem_set()->cleanup_after_oops_into_collection_set_do();
+
   concurrent_g1_refine()->set_use_cache(true);
 
   finalize_for_evac_failure();
@@ -4424,7 +4548,6 @@
 
   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
-
     if (PrintGCDetails) {
       gclog_or_tty->print(" (evacuation failed)");
     } else if (PrintGC) {
@@ -4432,6 +4555,14 @@
     }
   }
 
+  if (G1DeferredRSUpdate) {
+    RedirtyLoggedCardTableEntryFastClosure redirty;
+    dirty_card_queue_set().set_closure(&redirty);
+    dirty_card_queue_set().apply_closure_to_all_completed_buffers();
+    JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set());
+    assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
+  }
+
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -457,6 +457,10 @@
   // And it's mod ref barrier set, used to track updates for the above.
   ModRefBarrierSet* _mr_bs;
 
+  // A set of cards that cover the objects for which the Rsets should be updated
+  // concurrently after the collection.
+  DirtyCardQueueSet _dirty_card_queue_set;
+
   // The Heap Region Rem Set Iterator.
   HeapRegionRemSetIterator** _rem_set_iterator;
 
@@ -666,6 +670,9 @@
 
   RefToScanQueue *task_queue(int i);
 
+  // A set of cards where updates happened during the GC
+  DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
+
   // Create a G1CollectedHeap with the specified policy.
   // Must call the initialize method afterwards.
   // May not return if something goes wrong.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1014,7 +1014,7 @@
 
   _all_full_gc_times_ms->add(full_gc_time_ms);
 
-  update_recent_gc_times(end_sec, full_gc_time_sec);
+  update_recent_gc_times(end_sec, full_gc_time_ms);
 
   _g1->clear_full_collection();
 
@@ -1475,6 +1475,7 @@
   size_t cur_used_bytes = _g1->used();
   assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
   bool last_pause_included_initial_mark = false;
+  bool update_stats = !abandoned && !_g1->evacuation_failed();
 
 #ifndef PRODUCT
   if (G1YoungSurvRateVerbose) {
@@ -1535,7 +1536,7 @@
 
   _n_pauses++;
 
-  if (!abandoned) {
+  if (update_stats) {
     _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms);
     _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms);
     _recent_evac_times_ms->add(evac_ms);
@@ -1636,7 +1637,7 @@
   double termination_time = avg_value(_par_last_termination_times_ms);
 
   double parallel_other_time;
-  if (!abandoned) {
+  if (update_stats) {
     MainBodySummary* body_summary = summary->main_body_summary();
     guarantee(body_summary != NULL, "should not be null!");
 
@@ -1852,7 +1853,7 @@
 
   // <NEW PREDICTION>
 
-  if (!popular && !abandoned) {
+  if (!popular && update_stats) {
     double pause_time_ms = elapsed_ms;
 
     size_t diff = 0;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -966,7 +966,7 @@
     record_termination_time(0, ms);
   }
 
-  void record_pause_time(double ms) {
+  void record_pause_time_ms(double ms) {
     _last_pause_time_ms = ms;
   }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -105,33 +105,6 @@
   _g1->heap_region_iterate(&rc);
 }
 
-class UpdateRSOopClosure: public OopClosure {
-  HeapRegion* _from;
-  HRInto_G1RemSet* _rs;
-  int _worker_i;
-public:
-  UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
-    _from(NULL), _rs(rs), _worker_i(worker_i) {
-    guarantee(_rs != NULL, "Requires an HRIntoG1RemSet");
-  }
-
-  void set_from(HeapRegion* from) {
-    assert(from != NULL, "from region must be non-NULL");
-    _from = from;
-  }
-
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    assert(_from != NULL, "from region must be non-NULL");
-    _rs->par_write_ref(_from, p, _worker_i);
-  }
-  // Override: this closure is idempotent.
-  //  bool idempotent() { return true; }
-  bool apply_to_weak_ref_discovered_field() { return true; }
-};
-
 class UpdateRSOutOfRegionClosure: public HeapRegionClosure {
   G1CollectedHeap*    _g1h;
   ModRefBarrierSet*   _mr_bs;
@@ -177,11 +150,19 @@
     _cards_scanned(NULL), _total_cards_scanned(0)
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
-  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, ParallelGCThreads);
+  guarantee(n_workers() > 0, "There should be some workers");
+  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, n_workers());
+  for (uint i = 0; i < n_workers(); i++) {
+    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
+  }
 }
 
 HRInto_G1RemSet::~HRInto_G1RemSet() {
   delete _seq_task;
+  for (uint i = 0; i < n_workers(); i++) {
+    delete _new_refs[i];
+  }
+  FREE_C_HEAP_ARRAY(GrowableArray<oop*>*, _new_refs);
 }
 
 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -281,8 +262,9 @@
         if (!_ct_bs->is_card_claimed(card_index) &&
             !_ct_bs->is_card_dirty(card_index)) {
           assert(_ct_bs->is_card_clean(card_index) ||
-                 _ct_bs->is_card_claimed(card_index),
-                 "Card is either dirty, clean, or claimed");
+                 _ct_bs->is_card_claimed(card_index) ||
+                 _ct_bs->is_card_deferred(card_index),
+                 "Card is either clean, claimed or deferred");
           if (_ct_bs->claim_card(card_index))
             scanCard(card_index, card_region);
         }
@@ -338,14 +320,12 @@
 
   _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
   _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
-  if (ParallelGCThreads > 0) {
-    // In this case, we called scanNewRefsRS and recorded the corresponding
-    // time.
-    double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
-    if (scan_new_refs_time_ms > 0.0) {
-      closure_app_time_ms += scan_new_refs_time_ms;
-    }
+
+  double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
+  if (scan_new_refs_time_ms > 0.0) {
+    closure_app_time_ms += scan_new_refs_time_ms;
   }
+
   _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
 }
 
@@ -469,8 +449,8 @@
   double scan_new_refs_start_sec = os::elapsedTime();
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
-  while (_new_refs[worker_i]->is_nonempty()) {
-    oop* p = _new_refs[worker_i]->pop();
+  for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
+    oop* p = _new_refs[worker_i]->at(i);
     oop obj = *p;
     // *p was in the collection set when p was pushed on "_new_refs", but
     // another thread may have processed this location from an RS, so it
@@ -480,10 +460,6 @@
       HeapRegion* r = g1h->heap_region_containing(p);
 
       DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
-      assert(ParallelGCThreads > 1
-             || to->rem_set()->contains_reference(p),
-             "Invariant: pushed after being added."
-             "(Not reliable in parallel code.)");
       oc->set_region(r);
       // If "p" has already been processed concurrently, this is
       // idempotent.
@@ -538,8 +514,8 @@
     }
   } else {
     assert(worker_i == 0, "invariant");
-
     updateRS(0);
+    scanNewRefsRS(oc, 0);
     scanRS(oc, 0);
   }
 }
@@ -559,11 +535,7 @@
   assert(!_par_traversal_in_progress, "Invariant between iterations.");
   if (ParallelGCThreads > 0) {
     set_par_traversal(true);
-    int n_workers = _g1->workers()->total_workers();
-    _seq_task->set_par_threads(n_workers);
-    for (uint i = 0; i < ParallelGCThreads; i++)
-      _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
-
+    _seq_task->set_par_threads((int)n_workers());
     if (cg1r->do_traversal()) {
       updateRS(0);
       // Have to do this again after updaters
@@ -587,6 +559,53 @@
   }
 };
 
+class UpdateRSetOopsIntoCSImmediate : public OopClosure {
+  G1CollectedHeap* _g1;
+public:
+  UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    HeapRegion* to = _g1->heap_region_containing(*p);
+    if (to->in_collection_set()) {
+      if (to->rem_set()->add_reference(p, 0)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+  }
+};
+
+class UpdateRSetOopsIntoCSDeferred : public OopClosure {
+  G1CollectedHeap* _g1;
+  CardTableModRefBS* _ct_bs;
+  DirtyCardQueue* _dcq;
+public:
+  UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
+    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    oop obj = *p;
+    if (_g1->obj_in_cs(obj)) {
+      size_t card_index = _ct_bs->index_for(p);
+      if (_ct_bs->mark_card_deferred(card_index)) {
+        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
+      }
+    }
+  }
+};
+
+void HRInto_G1RemSet::new_refs_iterate(OopClosure* cl) {
+  for (size_t i = 0; i < n_workers(); i++) {
+    for (int j = 0; j < _new_refs[i]->length(); j++) {
+      oop* p = _new_refs[i]->at(j);
+      cl->do_oop(p);
+    }
+  }
+}
+
 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
   guarantee( _cards_scanned != NULL, "invariant" );
   _total_cards_scanned = 0;
@@ -609,11 +628,25 @@
     if (cg1r->do_traversal()) {
       cg1r->cg1rThread()->set_do_traversal(false);
     }
-    for (uint i = 0; i < ParallelGCThreads; i++) {
-      delete _new_refs[i];
-    }
     set_par_traversal(false);
   }
+
+  if (_g1->evacuation_failed()) {
+    // Restore remembered sets for the regions pointing into
+    // the collection set.
+    if (G1DeferredRSUpdate) {
+      DirtyCardQueue dcq(&_g1->dirty_card_queue_set());
+      UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq);
+      new_refs_iterate(&deferred_update);
+    } else {
+      UpdateRSetOopsIntoCSImmediate immediate_update(_g1);
+      new_refs_iterate(&immediate_update);
+    }
+  }
+  for (uint i = 0; i < n_workers(); i++) {
+    _new_refs[i]->clear();
+  }
+
   assert(!_par_traversal_in_progress, "Invariant between iterations.");
 }
 
@@ -683,7 +716,8 @@
   bool doHeapRegion(HeapRegion* r) {
     if (!r->in_collection_set() &&
         !r->continuesHumongous() &&
-        !r->is_young()) {
+        !r->is_young() &&
+        !r->is_survivor()) {
       _update_rs_oop_cl.set_from(r);
       UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
 
@@ -820,7 +854,7 @@
   // before all the cards on the region are dirtied. This is unlikely,
   // and it doesn't happen often, but it can happen. So, the extra
   // check below filters out those cards.
-  if (r->is_young()) {
+  if (r->is_young() || r->is_survivor()) {
     return;
   }
   // While we are processing RSet buffers during the collection, we
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -155,6 +155,7 @@
   bool _par_traversal_in_progress;
   void set_par_traversal(bool b);
   GrowableArray<oop*>** _new_refs;
+  void new_refs_iterate(OopClosure* cl);
 
 public:
   // This is called to reset dual hash tables after the gc pause
@@ -214,3 +215,27 @@
   int n() { return _n; };
   HeapWord* start_first() { return _start_first; }
 };
+
+class UpdateRSOopClosure: public OopClosure {
+  HeapRegion* _from;
+  HRInto_G1RemSet* _rs;
+  int _worker_i;
+public:
+  UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
+    _from(NULL), _rs(rs), _worker_i(worker_i) {
+    guarantee(_rs != NULL, "Requires an HRIntoG1RemSet");
+  }
+
+  void set_from(HeapRegion* from) {
+    assert(from != NULL, "from region must be non-NULL");
+    _from = from;
+  }
+
+  virtual void do_oop(narrowOop* p);
+  virtual void do_oop(oop* p);
+
+  // Override: this closure is idempotent.
+  //  bool idempotent() { return true; }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -31,24 +31,7 @@
 }
 
 inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) {
-  oop obj = *p;
-  assert(from != NULL && from->is_in_reserved(p),
-         "p is not in a from");
-  HeapRegion* to = _g1->heap_region_containing(obj);
-  if (from != to && to != NULL) {
-    if (!to->popular() && !from->is_survivor()) {
-#if G1_REM_SET_LOGGING
-      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
-                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
-                             p, obj,
-                             to->bottom(), to->end());
-#endif
-      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
-      if (to->rem_set()->add_reference(p)) {
-        _g1->schedule_popular_region_evac(to);
-      }
-    }
-  }
+  par_write_ref(from, p, 0);
 }
 
 inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) {
@@ -82,7 +65,22 @@
   HeapRegion* to = _g1->heap_region_containing(obj);
   // The test below could be optimized by applying a bit op to to and from.
   if (to != NULL && from != NULL && from != to) {
-    if (!to->popular() && !from->is_survivor()) {
+    bool update_delayed = false;
+    // There is a tricky infinite loop if we keep pushing
+    // self forwarding pointers onto our _new_refs list.
+    // The _par_traversal_in_progress flag is true during the collection pause,
+    // false during the evacuation failure handing.
+    if (_par_traversal_in_progress &&
+        to->in_collection_set() && !self_forwarded(obj)) {
+      _new_refs[tid]->push(p);
+      // Deferred updates to the Cset are either discarded (in the normal case),
+      // or processed (if an evacuation failure occurs) at the end
+      // of the collection.
+      // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do().
+      update_delayed = true;
+    }
+
+    if (!to->popular() && !update_delayed) {
 #if G1_REM_SET_LOGGING
       gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
                              " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
@@ -94,11 +92,14 @@
         _g1->schedule_popular_region_evac(to);
       }
     }
-    // There is a tricky infinite loop if we keep pushing
-    // self forwarding pointers onto our _new_refs list.
-    if (_par_traversal_in_progress &&
-        to->in_collection_set() && !self_forwarded(obj)) {
-      _new_refs[tid]->push(p);
-    }
   }
 }
+
+inline void UpdateRSOopClosure::do_oop(narrowOop* p) {
+  guarantee(false, "NYI");
+}
+
+inline void UpdateRSOopClosure::do_oop(oop* p) {
+  assert(_from != NULL, "from region must be non-NULL");
+  _rs->par_write_ref(_from, p, _worker_i);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -172,6 +172,9 @@
   develop(bool, G1RSBarrierUseQueue, true,                                  \
           "If true, use queueing RS barrier")                               \
                                                                             \
+  develop(bool, G1DeferredRSUpdate, true,                                   \
+          "If true, use deferred RS updates")                               \
+                                                                            \
   develop(bool, G1RSLogCheckCardTable, false,                               \
           "If true, verify that no dirty cards remain after RS log "        \
           "processing.")                                                    \
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -318,7 +318,8 @@
     FinalCountClaimValue  = 1,
     NoteEndClaimValue     = 2,
     ScrubRemSetClaimValue = 3,
-    ParVerifyClaimValue   = 4
+    ParVerifyClaimValue   = 4,
+    RebuildRSClaimValue   = 5
   };
 
   // Concurrent refinement requires contiguous heap regions (in which TLABs
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -91,15 +91,17 @@
   _n_completed_buffers(0),
   _process_completed_threshold(0), _process_completed(false),
   _buf_free_list(NULL), _buf_free_list_sz(0)
-{}
+{
+  _fl_owner = this;
+}
 
 void** PtrQueueSet::allocate_buffer() {
   assert(_sz > 0, "Didn't set a buffer size.");
-  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
-  if (_buf_free_list != NULL) {
-    void** res = _buf_free_list;
-    _buf_free_list = (void**)_buf_free_list[0];
-    _buf_free_list_sz--;
+  MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
+  if (_fl_owner->_buf_free_list != NULL) {
+    void** res = _fl_owner->_buf_free_list;
+    _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0];
+    _fl_owner->_buf_free_list_sz--;
     // Just override the next pointer with NULL, just in case we scan this part
     // of the buffer.
     res[0] = NULL;
@@ -111,10 +113,10 @@
 
 void PtrQueueSet::deallocate_buffer(void** buf) {
   assert(_sz > 0, "Didn't set a buffer size.");
-  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
-  buf[0] = (void*)_buf_free_list;
-  _buf_free_list = buf;
-  _buf_free_list_sz++;
+  MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
+  buf[0] = (void*)_fl_owner->_buf_free_list;
+  _fl_owner->_buf_free_list = buf;
+  _fl_owner->_buf_free_list_sz++;
 }
 
 void PtrQueueSet::reduce_free_list() {
@@ -207,3 +209,58 @@
 void PtrQueueSet::set_process_completed_threshold(size_t sz) {
   _process_completed_threshold = sz;
 }
+
+// Merge lists of buffers. Notify waiting threads if the length of the list
+// exceeds threshold. The source queue is emptied as a result. The queues
+// must share the monitor.
+void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
+  assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = src->_completed_buffers_head;
+    _completed_buffers_tail = src->_completed_buffers_tail;
+  } else {
+    assert(_completed_buffers_head != NULL, "Well formedness");
+    if (src->_completed_buffers_head != NULL) {
+      _completed_buffers_tail->next = src->_completed_buffers_head;
+      _completed_buffers_tail = src->_completed_buffers_tail;
+    }
+  }
+  _n_completed_buffers += src->_n_completed_buffers;
+
+  src->_n_completed_buffers = 0;
+  src->_completed_buffers_head = NULL;
+  src->_completed_buffers_tail = NULL;
+
+  assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
+         _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
+         "Sanity");
+
+  if (!_process_completed &&
+      _n_completed_buffers >= _process_completed_threshold) {
+    _process_completed = true;
+    if (_notify_when_complete)
+      _cbl_mon->notify_all();
+  }
+}
+
+// Merge free lists of the two queues. The free list of the source
+// queue is emptied as a result. The queues must share the same
+// mutex that guards free lists.
+void PtrQueueSet::merge_freelists(PtrQueueSet* src) {
+  assert(_fl_lock == src->_fl_lock, "Should share the same lock");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  if (_buf_free_list != NULL) {
+    void **p = _buf_free_list;
+    while (*p != NULL) {
+      p = (void**)*p;
+    }
+    *p = src->_buf_free_list;
+  } else {
+    _buf_free_list = src->_buf_free_list;
+  }
+  _buf_free_list_sz += src->_buf_free_list_sz;
+  src->_buf_free_list = NULL;
+  src->_buf_free_list_sz = 0;
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -155,6 +155,9 @@
   Mutex* _fl_lock;
   void** _buf_free_list;
   size_t _buf_free_list_sz;
+  // Queue set can share a freelist. The _fl_owner variable
+  // specifies the owner. It is set to "this" by default.
+  PtrQueueSet* _fl_owner;
 
   // The size of all buffers in the set.
   size_t _sz;
@@ -188,10 +191,13 @@
   // Because of init-order concerns, we can't pass these as constructor
   // arguments.
   void initialize(Monitor* cbl_mon, Mutex* fl_lock,
-                  int max_completed_queue = 0) {
+                  int max_completed_queue = 0,
+                  PtrQueueSet *fl_owner = NULL) {
     _max_completed_queue = max_completed_queue;
     assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?");
-    _cbl_mon = cbl_mon; _fl_lock = fl_lock;
+    _cbl_mon = cbl_mon;
+    _fl_lock = fl_lock;
+    _fl_owner = (fl_owner != NULL) ? fl_owner : this;
   }
 
   // Return an empty oop array of size _sz (required to be non-zero).
@@ -228,4 +234,7 @@
   void reduce_free_list();
 
   size_t completed_buffers_num() { return _n_completed_buffers; }
+
+  void merge_bufferlists(PtrQueueSet* src);
+  void merge_freelists(PtrQueueSet* src);
 };
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -504,6 +504,7 @@
   // Make sure that the current and next tables agree.  (Another mechanism
   // takes care of deleting now-unused tables.)
   _cur = _next;
+  set_expanded(false);
 }
 
 void SparsePRT::expand() {
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -274,7 +274,7 @@
 
   // Clean up all tables on the expanded list.  Called single threaded.
   static void cleanup_all();
-  RSHashTable* next() const { return _next; }
+  RSHashTable* cur() const { return _cur; }
 
 
   void init_iterator(SparsePRTIter* sprt_iter);
@@ -300,7 +300,7 @@
   {}
 
   void init(const SparsePRT* sprt) {
-    RSHashTableIter::init(sprt->next());
+    RSHashTableIter::init(sprt->cur());
   }
   bool has_next(size_t& card_index) {
     return RSHashTableIter::has_next(card_index);
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -78,7 +78,7 @@
   }
 
   // Card marks are not precise. The current system can leave us with
-  // a mismash of precise marks and begining of object marks. This means
+  // a mismash of precise marks and beginning of object marks. This means
   // we test for missing precise marks first. If any are found, we don't
   // fail unless the object head is also unmarked.
   virtual void do_object(oop obj) {
@@ -258,7 +258,7 @@
     if (!start_array->object_starts_in_range(slice_start, slice_end)) {
       continue;
     }
-    // Update our begining addr
+    // Update our beginning addr
     HeapWord* first_object = start_array->object_start(slice_start);
     debug_only(oop* first_object_within_slice = (oop*) first_object;)
     if (first_object < slice_start) {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -127,7 +127,7 @@
   // Optimized for finding the first object that crosses into
   // a given block. The blocks contain the offset of the last
   // object in that block. Scroll backwards by one, and the first
-  // object hit should be at the begining of the block
+  // object hit should be at the beginning of the block
   HeapWord* object_start(HeapWord* addr) const {
     assert(_covered_region.contains(addr), "Must be in covered region");
     jbyte* block = block_for_addr(addr);
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -26,7 +26,7 @@
 // PrefetchQueue is a FIFO queue of variable length (currently 8).
 //
 // We need to examine the performance penalty of variable lengths.
-// We may also want to split this into cpu dependant bits.
+// We may also want to split this into cpu dependent bits.
 //
 
 const int PREFETCH_QUEUE_SIZE  = 8;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -125,6 +125,8 @@
     perm_gen->verify_object_start_array();
   }
 
+  heap->pre_full_gc_dump();
+
   // Filled in below to track the state of the young gen after the collection.
   bool eden_empty;
   bool survivors_empty;
@@ -363,6 +365,8 @@
     Universe::print_heap_after_gc();
   }
 
+  heap->post_full_gc_dump();
+
 #ifdef TRACESPINNING
   ParallelTaskTerminator::print_termination_counts();
 #endif
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1982,6 +1982,8 @@
     heap->record_gen_tops_before_GC();
   }
 
+  heap->pre_full_gc_dump();
+
   _print_phases = PrintGCDetails && PrintParallelOldGCPhaseTimes;
 
   // Make sure data structures are sane, make the heap parsable, and do other
@@ -2204,6 +2206,8 @@
     gc_task_manager()->print_task_time_stamps();
   }
 
+  heap->post_full_gc_dump();
+
 #ifdef TRACESPINNING
   ParallelTaskTerminator::print_termination_counts();
 #endif
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -74,7 +74,7 @@
   for (int i = 0; i < lgrp_spaces()->length(); i++) {
     LGRPSpace *ls = lgrp_spaces()->at(i);
     MutableSpace *s = ls->space();
-    if (s->top() < top()) { // For all spaces preceeding the one containing top()
+    if (s->top() < top()) { // For all spaces preceding the one containing top()
       if (s->free_in_words() > 0) {
         size_t area_touched_words = pointer_delta(s->end(), s->top());
         CollectedHeap::fill_with_object(s->top(), area_touched_words);
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -121,7 +121,7 @@
     // make the heap parsable (no need to retire TLABs)
     ch->ensure_parsability(false);
   }
-  HeapInspection::heap_inspection(_out);
+  HeapInspection::heap_inspection(_out, _need_prologue /* need_prologue */);
 }
 
 
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -112,13 +112,16 @@
  private:
   outputStream* _out;
   bool _full_gc;
+  bool _need_prologue;
  public:
-  VM_GC_HeapInspection(outputStream* out, bool request_full_gc) :
+  VM_GC_HeapInspection(outputStream* out, bool request_full_gc,
+                       bool need_prologue) :
     VM_GC_Operation(0 /* total collections,      dummy, ignored */,
                     0 /* total full collections, dummy, ignored */,
                     request_full_gc) {
     _out = out;
     _full_gc = request_full_gc;
+    _need_prologue = need_prologue;
   }
 
   ~VM_GC_HeapInspection() {}
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -294,3 +294,29 @@
     ThreadLocalAllocBuffer::resize_all_tlabs();
   }
 }
+
+void CollectedHeap::pre_full_gc_dump() {
+  if (HeapDumpBeforeFullGC) {
+    TraceTime tt("Heap Dump: ", PrintGCDetails, false, gclog_or_tty);
+    // We are doing a "major" collection and a heap dump before
+    // major collection has been requested.
+    HeapDumper::dump_heap();
+  }
+  if (PrintClassHistogramBeforeFullGC) {
+    TraceTime tt("Class Histogram: ", PrintGCDetails, true, gclog_or_tty);
+    VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */);
+    inspector.doit();
+  }
+}
+
+void CollectedHeap::post_full_gc_dump() {
+  if (HeapDumpAfterFullGC) {
+    TraceTime tt("Heap Dump", PrintGCDetails, false, gclog_or_tty);
+    HeapDumper::dump_heap();
+  }
+  if (PrintClassHistogramAfterFullGC) {
+    TraceTime tt("Class Histogram", PrintGCDetails, true, gclog_or_tty);
+    VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */);
+    inspector.doit();
+  }
+}
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -514,6 +514,10 @@
   // Perform any cleanup actions necessary before allowing a verification.
   virtual void prepare_for_verify() = 0;
 
+  // Generate any dumps preceding or following a full gc
+  void pre_full_gc_dump();
+  void post_full_gc_dump();
+
   virtual void print() const = 0;
   virtual void print_on(outputStream* st) const = 0;
 
--- a/hotspot/src/share/vm/includeDB_core	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/includeDB_core	Thu Mar 12 18:16:36 2009 -0700
@@ -176,7 +176,7 @@
 arguments.cpp                           oop.inline.hpp
 arguments.cpp                           os_<os_family>.inline.hpp
 arguments.cpp                           universe.inline.hpp
-arguments.cpp                           vm_version_<arch_model>.hpp
+arguments.cpp                           vm_version_<arch>.hpp
 
 arguments.hpp                           java.hpp
 arguments.hpp                           perfData.hpp
@@ -241,7 +241,7 @@
 assembler.hpp                           register_<arch>.hpp
 assembler.hpp                           relocInfo.hpp
 assembler.hpp                           top.hpp
-assembler.hpp                           vm_version_<arch_model>.hpp
+assembler.hpp                           vm_version_<arch>.hpp
 
 assembler.inline.hpp                    assembler.hpp
 assembler.inline.hpp                    codeBuffer.hpp
@@ -280,7 +280,7 @@
 
 atomic_<os_arch>.inline.hpp             atomic.hpp
 atomic_<os_arch>.inline.hpp             os.hpp
-atomic_<os_arch>.inline.hpp             vm_version_<arch_model>.hpp
+atomic_<os_arch>.inline.hpp             vm_version_<arch>.hpp
 
 // attachListener is jck optional, put cpp deps in includeDB_features
 
@@ -474,6 +474,7 @@
 cardTableModRefBS.cpp                   mutexLocker.hpp
 cardTableModRefBS.cpp                   sharedHeap.hpp
 cardTableModRefBS.cpp                   space.hpp
+cardTableModRefBS.cpp                   space.inline.hpp
 cardTableModRefBS.cpp                   universe.hpp
 cardTableModRefBS.cpp                   virtualspace.hpp
 
@@ -2094,6 +2095,7 @@
 interp_masm_<arch_model>.cpp            interpreterRuntime.hpp
 interp_masm_<arch_model>.cpp            interpreter.hpp
 interp_masm_<arch_model>.cpp            jvmtiExport.hpp
+interp_masm_<arch_model>.cpp            jvmtiRedefineClassesTrace.hpp
 interp_masm_<arch_model>.cpp            jvmtiThreadState.hpp
 interp_masm_<arch_model>.cpp            markOop.hpp
 interp_masm_<arch_model>.cpp            methodDataOop.hpp
@@ -2176,7 +2178,7 @@
 interpreterRuntime.cpp                  threadCritical.hpp
 interpreterRuntime.cpp                  universe.inline.hpp
 interpreterRuntime.cpp                  vmSymbols.hpp
-interpreterRuntime.cpp                  vm_version_<arch_model>.hpp
+interpreterRuntime.cpp                  vm_version_<arch>.hpp
 
 interpreterRuntime.hpp                  bytecode.hpp
 interpreterRuntime.hpp                  frame.inline.hpp
@@ -2279,7 +2281,7 @@
 java.cpp                                universe.hpp
 java.cpp                                vmError.hpp
 java.cpp                                vm_operations.hpp
-java.cpp                                vm_version_<arch_model>.hpp
+java.cpp                                vm_version_<arch>.hpp
 java.cpp                                vtune.hpp
 
 java.hpp                                os.hpp
@@ -3485,7 +3487,7 @@
 register_<arch>.cpp                     register_<arch>.hpp
 
 register_<arch>.hpp                     register.hpp
-register_<arch>.hpp                     vm_version_<arch_model>.hpp
+register_<arch>.hpp                     vm_version_<arch>.hpp
 
 registerMap.hpp                         globalDefinitions.hpp
 registerMap.hpp                         register_<arch>.hpp
@@ -3670,6 +3672,7 @@
 sharedRuntime.cpp                       interpreter.hpp
 sharedRuntime.cpp                       javaCalls.hpp
 sharedRuntime.cpp                       jvmtiExport.hpp
+sharedRuntime.cpp                       jvmtiRedefineClassesTrace.hpp
 sharedRuntime.cpp                       nativeInst_<arch>.hpp
 sharedRuntime.cpp                       nativeLookup.hpp
 sharedRuntime.cpp                       oop.inline.hpp
@@ -3699,6 +3702,7 @@
 sharedRuntime_<arch_model>.cpp          debugInfoRec.hpp
 sharedRuntime_<arch_model>.cpp          icBuffer.hpp
 sharedRuntime_<arch_model>.cpp          interpreter.hpp
+sharedRuntime_<arch_model>.cpp          jvmtiRedefineClassesTrace.hpp
 sharedRuntime_<arch_model>.cpp          sharedRuntime.hpp
 sharedRuntime_<arch_model>.cpp          vframeArray.hpp
 sharedRuntime_<arch_model>.cpp          vmreg_<arch>.inline.hpp
@@ -3835,7 +3839,7 @@
 statSampler.cpp                         statSampler.hpp
 statSampler.cpp                         systemDictionary.hpp
 statSampler.cpp                         vmSymbols.hpp
-statSampler.cpp                         vm_version_<arch_model>.hpp
+statSampler.cpp                         vm_version_<arch>.hpp
 
 statSampler.hpp                         perfData.hpp
 statSampler.hpp                         task.hpp
@@ -4579,22 +4583,22 @@
 vm_version.cpp                          arguments.hpp
 vm_version.cpp                          oop.inline.hpp
 vm_version.cpp                          universe.hpp
-vm_version.cpp                          vm_version_<arch_model>.hpp
+vm_version.cpp                          vm_version_<arch>.hpp
 
 vm_version.hpp                          allocation.hpp
 vm_version.hpp                          ostream.hpp
 
-vm_version_<arch_model>.cpp             assembler_<arch>.inline.hpp
-vm_version_<arch_model>.cpp             java.hpp
-vm_version_<arch_model>.cpp             os_<os_family>.inline.hpp
-vm_version_<arch_model>.cpp             resourceArea.hpp
-vm_version_<arch_model>.cpp             stubCodeGenerator.hpp
-vm_version_<arch_model>.cpp             vm_version_<arch_model>.hpp
-
-vm_version_<arch_model>.hpp             globals_extension.hpp
-vm_version_<arch_model>.hpp             vm_version.hpp
-
-vm_version_<os_arch>.cpp                vm_version_<arch_model>.hpp
+vm_version_<arch>.cpp                   assembler_<arch>.inline.hpp
+vm_version_<arch>.cpp                   java.hpp
+vm_version_<arch>.cpp                   os_<os_family>.inline.hpp
+vm_version_<arch>.cpp                   resourceArea.hpp
+vm_version_<arch>.cpp                   stubCodeGenerator.hpp
+vm_version_<arch>.cpp                   vm_version_<arch>.hpp
+
+vm_version_<arch>.hpp                   globals_extension.hpp
+vm_version_<arch>.hpp                   vm_version.hpp
+
+vm_version_<os_arch>.cpp                vm_version_<arch>.hpp
 
 vmreg.cpp                               assembler.hpp
 vmreg.cpp                               vmreg.hpp
--- a/hotspot/src/share/vm/includeDB_gc	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/includeDB_gc	Thu Mar 12 18:16:36 2009 -0700
@@ -26,10 +26,12 @@
 
 collectedHeap.cpp                       collectedHeap.hpp
 collectedHeap.cpp                       collectedHeap.inline.hpp
+collectedHeap.cpp                       heapDumper.hpp
 collectedHeap.cpp                       init.hpp
 collectedHeap.cpp                       oop.inline.hpp
 collectedHeap.cpp                       systemDictionary.hpp
 collectedHeap.cpp                       thread_<os_family>.inline.hpp
+collectedHeap.cpp                       vmGCOperations.hpp
 
 collectedHeap.hpp                       allocation.hpp
 collectedHeap.hpp                       barrierSet.hpp
--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file contains the platform-independant parts
+// This file contains the platform-independent parts
 // of the abstract interpreter and the abstract interpreter generator.
 
 // Organization of the interpreter(s). There exists two different interpreters in hotpot
--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -2642,7 +2642,7 @@
         // two interpreted frames). We need to save the current arguments in C heap so that
         // the deoptimized frame when it restarts can copy the arguments to its expression
         // stack and re-execute the call. We also have to notify deoptimization that this
-        // has occured and to pick the preerved args copy them to the deoptimized frame's
+        // has occurred and to pick the preserved args copy them to the deoptimized frame's
         // java expression stack. Yuck.
         //
         THREAD->popframe_preserve_args(in_ByteSize(METHOD->size_of_parameters() * wordSize),
--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file holds platform-independant bodies of inline functions for the C++ based interpreter
+// This file holds platform-independent bodies of inline functions for the C++ based interpreter
 
 #ifdef CC_INTERP
 
--- a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -24,7 +24,7 @@
 
 #ifdef CC_INTERP
 
-// This file contains the platform-independant parts
+// This file contains the platform-independent parts
 // of the c++ interpreter
 
 class CppInterpreter: public AbstractInterpreter {
--- a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file contains the platform-independant parts
+// This file contains the platform-independent parts
 // of the template interpreter generator.
 
 #ifdef CC_INTERP
--- a/hotspot/src/share/vm/interpreter/interpreter.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/interpreter.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file contains the platform-independant parts
+// This file contains the platform-independent parts
 // of the interpreter and the interpreter generator.
 
 //------------------------------------------------------------------------------------------------------------------------
--- a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file contains the platform-independant parts
+// This file contains the platform-independent parts
 // of the interpreter generator.
 
 
--- a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file contains the platform-independant parts
+// This file contains the platform-independent parts
 // of the template interpreter and the template interpreter generator.
 
 #ifndef CC_INTERP
--- a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file contains the platform-independant parts
+// This file contains the platform-independent parts
 // of the template interpreter generator.
 
 #ifndef CC_INTERP
--- a/hotspot/src/share/vm/libadt/dict.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/libadt/dict.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -306,7 +306,7 @@
 // Convert string to hash key.  This algorithm implements a universal hash
 // function with the multipliers frozen (ok, so it's not universal).  The
 // multipliers (and allowable characters) are all odd, so the resultant sum
-// is odd - guarenteed not divisible by any power of two, so the hash tables
+// is odd - guaranteed not divisible by any power of two, so the hash tables
 // can be any power of two with good results.  Also, I choose multipliers
 // that have only 2 bits set (the low is always set to be odd) so
 // multiplication requires only shifts and adds.  Characters are required to
@@ -326,7 +326,7 @@
 }
 
 //------------------------------hashptr--------------------------------------
-// Slimey cheap hash function; no guarenteed performance.  Better than the
+// Slimey cheap hash function; no guaranteed performance.  Better than the
 // default for pointers, especially on MS-DOS machines.
 int hashptr(const void *key) {
 #ifdef __TURBOC__
@@ -336,7 +336,7 @@
 #endif
 }
 
-// Slimey cheap hash function; no guarenteed performance.
+// Slimey cheap hash function; no guaranteed performance.
 int hashkey(const void *key) {
   return (intptr_t)key;
 }
--- a/hotspot/src/share/vm/libadt/dict.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/libadt/dict.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -86,10 +86,10 @@
 
 // Hashing functions
 int hashstr(const void *s);        // Nice string hash
-// Slimey cheap hash function; no guarenteed performance.  Better than the
+// Slimey cheap hash function; no guaranteed performance.  Better than the
 // default for pointers, especially on MS-DOS machines.
 int hashptr(const void *key);
-// Slimey cheap hash function; no guarenteed performance.
+// Slimey cheap hash function; no guaranteed performance.
 int hashkey(const void *key);
 
 // Key comparators
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -217,15 +217,28 @@
       (HeapWord*) align_size_up((uintptr_t)new_end, _page_size);
     assert(new_end_aligned >= (HeapWord*) new_end,
            "align up, but less");
+    // Check the other regions (excludes "ind") to ensure that
+    // the new_end_aligned does not intrude onto the committed
+    // space of another region.
     int ri = 0;
     for (ri = 0; ri < _cur_covered_regions; ri++) {
       if (ri != ind) {
         if (_committed[ri].contains(new_end_aligned)) {
-          assert((new_end_aligned >= _committed[ri].start()) &&
-                 (_committed[ri].start() > _committed[ind].start()),
+          // The prior check included in the assert
+          // (new_end_aligned >= _committed[ri].start())
+          // is redundant with the "contains" test.
+          // Any region containing the new end
+          // should start at or beyond the region found (ind)
+          // for the new end (committed regions are not expected to
+          // be proper subsets of other committed regions).
+          assert(_committed[ri].start() >= _committed[ind].start(),
                  "New end of committed region is inconsistent");
           new_end_aligned = _committed[ri].start();
-          assert(new_end_aligned > _committed[ind].start(),
+          // new_end_aligned can be equal to the start of its
+          // committed region (i.e., of "ind") if a second
+          // region following "ind" also start at the same location
+          // as "ind".
+          assert(new_end_aligned >= _committed[ind].start(),
             "New end of committed region is before start");
           debug_only(collided = true;)
           // Should only collide with 1 region
@@ -343,18 +356,62 @@
   inline_write_ref_field(field, newVal);
 }
 
+/*
+   Claimed and deferred bits are used together in G1 during the evacuation
+   pause. These bits can have the following state transitions:
+   1. The claimed bit can be put over any other card state. Except that
+      the "dirty -> dirty and claimed" transition is checked for in
+      G1 code and is not used.
+   2. Deferred bit can be set only if the previous state of the card
+      was either clean or claimed. mark_card_deferred() is wait-free.
+      We do not care if the operation is be successful because if
+      it does not it will only result in duplicate entry in the update
+      buffer because of the "cache-miss". So it's not worth spinning.
+ */
+
 
 bool CardTableModRefBS::claim_card(size_t card_index) {
   jbyte val = _byte_map[card_index];
-  if (val != claimed_card_val()) {
-    jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val);
-    if (res == val)
+  assert(val != dirty_card_val(), "Shouldn't claim a dirty card");
+  while (val == clean_card_val() ||
+         (val & (clean_card_mask_val() | claimed_card_val())) != claimed_card_val()) {
+    jbyte new_val = val;
+    if (val == clean_card_val()) {
+      new_val = (jbyte)claimed_card_val();
+    } else {
+      new_val = val | (jbyte)claimed_card_val();
+    }
+    jbyte res = Atomic::cmpxchg(new_val, &_byte_map[card_index], val);
+    if (res == val) {
       return true;
-    else return false;
+    }
+    val = res;
   }
   return false;
 }
 
+bool CardTableModRefBS::mark_card_deferred(size_t card_index) {
+  jbyte val = _byte_map[card_index];
+  // It's already processed
+  if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) {
+    return false;
+  }
+  // Cached bit can be installed either on a clean card or on a claimed card.
+  jbyte new_val = val;
+  if (val == clean_card_val()) {
+    new_val = (jbyte)deferred_card_val();
+  } else {
+    if (val & claimed_card_val()) {
+      new_val = val | (jbyte)deferred_card_val();
+    }
+  }
+  if (new_val != val) {
+    Atomic::cmpxchg(new_val, &_byte_map[card_index], val);
+  }
+  return true;
+}
+
+
 void CardTableModRefBS::non_clean_card_iterate(Space* sp,
                                                MemRegion mr,
                                                DirtyCardToOopClosure* dcto_cl,
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -52,11 +52,15 @@
 
   enum CardValues {
     clean_card                  = -1,
+    // The mask contains zeros in places for all other values.
+    clean_card_mask             = clean_card - 31,
+
     dirty_card                  =  0,
     precleaned_card             =  1,
-    claimed_card                =  3,
-    last_card                   =  4,
-    CT_MR_BS_last_reserved      = 10
+    claimed_card                =  2,
+    deferred_card               =  4,
+    last_card                   =  8,
+    CT_MR_BS_last_reserved      = 16
   };
 
   // dirty and precleaned are equivalent wrt younger_refs_iter.
@@ -254,9 +258,11 @@
   };
 
   static int clean_card_val()      { return clean_card; }
+  static int clean_card_mask_val() { return clean_card_mask; }
   static int dirty_card_val()      { return dirty_card; }
   static int claimed_card_val()    { return claimed_card; }
   static int precleaned_card_val() { return precleaned_card; }
+  static int deferred_card_val()   { return deferred_card; }
 
   // For RTTI simulation.
   bool is_a(BarrierSet::Name bsn) {
@@ -329,7 +335,8 @@
   }
 
   bool is_card_claimed(size_t card_index) {
-    return _byte_map[card_index] == claimed_card_val();
+    jbyte val = _byte_map[card_index];
+    return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val();
   }
 
   bool claim_card(size_t card_index);
@@ -338,6 +345,13 @@
     return _byte_map[card_index] == clean_card_val();
   }
 
+  bool is_card_deferred(size_t card_index) {
+    jbyte val = _byte_map[card_index];
+    return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val();
+  }
+
+  bool mark_card_deferred(size_t card_index);
+
   // Card marking array base (adjusted for heap low boundary)
   // This would be the 0th element of _byte_map, if the heap started at 0x0.
   // But since the heap starts at some higher address, this points to somewhere
@@ -434,6 +448,10 @@
     return byte_for(p) - _byte_map;
   }
 
+  const jbyte* byte_for_index(const size_t card_index) const {
+    return _byte_map + card_index;
+  }
+
   void verify();
   void verify_guard();
 
--- a/hotspot/src/share/vm/memory/filemap.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/memory/filemap.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -35,14 +35,14 @@
 extern address JVM_FunctionAtStart();
 extern address JVM_FunctionAtEnd();
 
-// Complain and stop. All error conditions occuring during the writing of
+// Complain and stop. All error conditions occurring during the writing of
 // an archive file should stop the process.  Unrecoverable errors during
 // the reading of the archive file should stop the process.
 
 static void fail(const char *msg, va_list ap) {
   // This occurs very early during initialization: tty is not initialized.
   jio_fprintf(defaultStream::error_stream(),
-              "An error has occured while processing the"
+              "An error has occurred while processing the"
               " shared archive file.\n");
   jio_vfprintf(defaultStream::error_stream(), msg, ap);
   jio_fprintf(defaultStream::error_stream(), "\n");
--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -456,6 +456,9 @@
     int max_level_collected = starting_level;
     for (int i = starting_level; i <= max_level; i++) {
       if (_gens[i]->should_collect(full, size, is_tlab)) {
+        if (i == n_gens() - 1) {  // a major collection is to happen
+          pre_full_gc_dump();    // do any pre full gc dumps
+        }
         // Timer for individual generations. Last argument is false: no CR
         TraceTime t1(_gens[i]->short_name(), PrintGCDetails, false, gclog_or_tty);
         TraceCollectorStats tcs(_gens[i]->counters());
@@ -573,6 +576,10 @@
     // a whole heap collection.
     complete = complete || (max_level_collected == n_gens() - 1);
 
+    if (complete) { // We did a "major" collection
+      post_full_gc_dump();   // do any post full gc dumps
+    }
+
     if (PrintGCDetails) {
       print_heap_change(gch_prev_used);
 
--- a/hotspot/src/share/vm/memory/heapInspection.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/memory/heapInspection.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -233,7 +233,7 @@
   size_t missed_count() { return _missed_count; }
 };
 
-void HeapInspection::heap_inspection(outputStream* st) {
+void HeapInspection::heap_inspection(outputStream* st, bool need_prologue) {
   ResourceMark rm;
   HeapWord* ref;
 
@@ -244,7 +244,9 @@
     case CollectedHeap::GenCollectedHeap: {
       is_shared_heap = true;
       SharedHeap* sh = (SharedHeap*)heap;
-      sh->gc_prologue(false /* !full */); // get any necessary locks, etc.
+      if (need_prologue) {
+        sh->gc_prologue(false /* !full */); // get any necessary locks, etc.
+      }
       ref = sh->perm_gen()->used_region().start();
       break;
     }
@@ -290,7 +292,7 @@
   }
   st->flush();
 
-  if (is_shared_heap) {
+  if (need_prologue && is_shared_heap) {
     SharedHeap* sh = (SharedHeap*)heap;
     sh->gc_epilogue(false /* !full */); // release all acquired locks, etc.
   }
--- a/hotspot/src/share/vm/memory/heapInspection.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/memory/heapInspection.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -127,6 +127,6 @@
 
 class HeapInspection : public AllStatic {
  public:
-  static void heap_inspection(outputStream* st) KERNEL_RETURN;
+  static void heap_inspection(outputStream* st, bool need_prologue) KERNEL_RETURN;
   static void find_instances_at_safepoint(klassOop k, GrowableArray<oop>* result) KERNEL_RETURN;
 };
--- a/hotspot/src/share/vm/memory/permGen.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/memory/permGen.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -36,7 +36,7 @@
   friend class VMStructs;
  protected:
   size_t _capacity_expansion_limit;  // maximum expansion allowed without a
-                                     // full gc occuring
+                                     // full gc occurring
 
   HeapWord* mem_allocate_in_gen(size_t size, Generation* gen);
 
--- a/hotspot/src/share/vm/oops/generateOopMap.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/oops/generateOopMap.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -2003,7 +2003,7 @@
 //  ============ Main Entry Point ===========
 //
 GenerateOopMap::GenerateOopMap(methodHandle method) {
-  // We have to initialize all variables here, that can be queried direcly
+  // We have to initialize all variables here, that can be queried directly
   _method = method;
   _max_locals=0;
   _init_vars = NULL;
--- a/hotspot/src/share/vm/oops/generateOopMap.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/oops/generateOopMap.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -292,7 +292,7 @@
   int          _max_stack;                  // Cached value of max. stack depth
   int          _max_monitors;               // Cached value of max. monitor stack depth
   int          _has_exceptions;             // True, if exceptions exist for method
-  bool         _got_error;                  // True, if an error occured during interpretation.
+  bool         _got_error;                  // True, if an error occurred during interpretation.
   Handle       _exception;                  // Exception if got_error is true.
   bool         _did_rewriting;              // was bytecodes rewritten
   bool         _did_relocation;             // was relocation neccessary
@@ -422,7 +422,7 @@
   void  add_to_ref_init_set                 (int localNo);
 
   // Conflicts rewrite logic
-  bool      _conflict;                      // True, if a conflict occured during interpretation
+  bool      _conflict;                      // True, if a conflict occurred during interpretation
   int       _nof_refval_conflicts;          // No. of conflicts that require rewrites
   int *     _new_var_map;
 
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1917,7 +1917,7 @@
                        / itableOffsetEntry::size();
 
   for (int cnt = 0 ; ; cnt ++, ioe ++) {
-    // If the interface isn't implemented by the reciever class,
+    // If the interface isn't implemented by the receiver class,
     // the VM should throw IncompatibleClassChangeError.
     if (cnt >= nof_interfaces) {
       THROW_OOP_0(vmSymbols::java_lang_IncompatibleClassChangeError());
--- a/hotspot/src/share/vm/oops/klass.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/oops/klass.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -71,7 +71,7 @@
   return r;                   // Return the 1 concrete class
 }
 
-// Find LCA in class heirarchy
+// Find LCA in class hierarchy
 Klass *Klass::LCA( Klass *k2 ) {
   Klass *k1 = this;
   while( 1 ) {
--- a/hotspot/src/share/vm/oops/klass.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/oops/klass.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -471,7 +471,7 @@
   }
   bool search_secondary_supers(klassOop k) const;
 
-  // Find LCA in class heirarchy
+  // Find LCA in class hierarchy
   Klass *LCA( Klass *k );
 
   // Check whether reflection/jni/jvm code is allowed to instantiate this class;
--- a/hotspot/src/share/vm/oops/klassVtable.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/oops/klassVtable.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -992,6 +992,10 @@
     methodOop new_method = new_methods[j];
     itableMethodEntry* ime = method_entry(0);
 
+    // The itable can describe more than one interface and the same
+    // method signature can be specified by more than one interface.
+    // This means we have to do an exhaustive search to find all the
+    // old_method references.
     for (int i = 0; i < _size_method_table; i++) {
       if (ime->method() == old_method) {
         ime->initialize(new_method);
@@ -1008,7 +1012,6 @@
             new_method->name()->as_C_string(),
             new_method->signature()->as_C_string()));
         }
-        break;
       }
       ime++;
     }
--- a/hotspot/src/share/vm/oops/methodOop.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/oops/methodOop.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -296,7 +296,7 @@
   void set_compiled_invocation_count(int count)  { _compiled_invocation_count = count; }
 #endif // not PRODUCT
 
-  // Clear (non-shared space) pointers which could not be relevent
+  // Clear (non-shared space) pointers which could not be relevant
   // if this (shared) method were mapped into another JVM.
   void remove_unshareable_info();
 
--- a/hotspot/src/share/vm/opto/block.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/block.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -181,7 +181,7 @@
 }
 
 //------------------------------has_uncommon_code------------------------------
-// Return true if the block's code implies that it is not likely to be
+// Return true if the block's code implies that it is likely to be
 // executed infrequently.  Check to see if the block ends in a Halt or
 // a low probability call.
 bool Block::has_uncommon_code() const {
@@ -909,6 +909,10 @@
               !(n->jvms() != NULL && n->jvms()->is_monitor_use(k)) ) {
             assert( b->find_node(def) < j, "uses must follow definitions" );
           }
+          if( def->is_SafePointScalarObject() ) {
+            assert(_bbs[def->_idx] == b, "SafePointScalarObject Node should be at the same block as its SafePoint node");
+            assert(_bbs[def->_idx] == _bbs[def->in(0)->_idx], "SafePointScalarObject Node should be at the same block as its control edge");
+          }
         }
       }
     }
@@ -1307,7 +1311,7 @@
       }
     } else if (e->state() == CFGEdge::open) {
       // Append traces, even without a fall-thru connection.
-      // But leave root entry at the begining of the block list.
+      // But leave root entry at the beginning of the block list.
       if (targ_trace != trace(_cfg._broot)) {
         e->set_state(CFGEdge::connected);
         src_trace->append(targ_trace);
@@ -1430,7 +1434,7 @@
     }
 
     // Backbranch to the top of a trace
-    // Scroll foward through the trace from the targ_block. If we find
+    // Scroll forward through the trace from the targ_block. If we find
     // a loop head before another loop top, use the the loop head alignment.
     for (Block *b = targ_block; b != NULL; b = next(b)) {
       if (b->has_loop_alignment()) {
--- a/hotspot/src/share/vm/opto/block.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/block.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -347,6 +347,8 @@
   // Helper function to insert a node into a block
   void schedule_node_into_block( Node *n, Block *b );
 
+  void replace_block_proj_ctrl( Node *n );
+
   // Set the basic block for pinned Nodes
   void schedule_pinned_nodes( VectorSet &visited );
 
@@ -607,7 +609,7 @@
   Block * next(Block *b) const { return _next_list[b->_pre_order]; }
   void set_next(Block *b, Block *n) const { _next_list[b->_pre_order] = n; }
 
-  // Return the block that preceeds "b" in the trace.
+  // Return the block that precedes "b" in the trace.
   Block * prev(Block *b) const { return _prev_list[b->_pre_order]; }
   void set_prev(Block *b, Block *p) const { _prev_list[b->_pre_order] = p; }
 
--- a/hotspot/src/share/vm/opto/buildOopMap.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/buildOopMap.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -55,7 +55,7 @@
 // breadth-first approach but it was worse (showed O(n^2) in the
 // pick-next-block code).
 //
-// The relevent data is kept in a struct of arrays (it could just as well be
+// The relevant data is kept in a struct of arrays (it could just as well be
 // an array of structs, but the struct-of-arrays is generally a little more
 // efficient).  The arrays are indexed by register number (including
 // stack-slots as registers) and so is bounded by 200 to 300 elements in
--- a/hotspot/src/share/vm/opto/callnode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -975,6 +975,7 @@
 }
 
 bool SafePointScalarObjectNode::pinned() const { return true; }
+bool SafePointScalarObjectNode::depends_only_on_test() const { return false; }
 
 uint SafePointScalarObjectNode::ideal_reg() const {
   return 0; // No matching to machine instruction
--- a/hotspot/src/share/vm/opto/callnode.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/callnode.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -437,6 +437,10 @@
   // of the SafePoint node for which it was generated.
   virtual bool pinned() const; // { return true; }
 
+  // SafePointScalarObject depends on the SafePoint node
+  // for which it was generated.
+  virtual bool depends_only_on_test() const; // { return false; }
+
   virtual uint size_of() const { return sizeof(*this); }
 
   // Assumes that "this" is an argument to a safepoint node "s", and that
--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1350,7 +1350,7 @@
   }
 
   // Register the new node but do not transform it.  Cannot transform until the
-  // entire Region/Phi conglerate has been hacked as a single huge transform.
+  // entire Region/Phi conglomerate has been hacked as a single huge transform.
   igvn->register_new_node_with_optimizer( newn );
   // Now I can point to the new node.
   n->add_req(newn);
@@ -1381,7 +1381,7 @@
   Node *val = phi->in(i);       // Constant to split for
   uint hit = 0;                 // Number of times it occurs
 
-  for( ; i < phi->req(); i++ ){ // Count occurances of constant
+  for( ; i < phi->req(); i++ ){ // Count occurrences of constant
     Node *n = phi->in(i);
     if( !n ) return NULL;
     if( phase->type(n) == Type::TOP ) return NULL;
@@ -1423,7 +1423,7 @@
 
 //=============================================================================
 //------------------------------simple_data_loop_check-------------------------
-//  Try to determing if the phi node in a simple safe/unsafe data loop.
+//  Try to determining if the phi node in a simple safe/unsafe data loop.
 //  Returns:
 // enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop };
 // Safe       - safe case when the phi and it's inputs reference only safe data
@@ -1687,7 +1687,7 @@
               progress = phase->C->top();
               break;
             }
-            // If tranformed to a MergeMem, get the desired slice
+            // If transformed to a MergeMem, get the desired slice
             // Otherwise the returned node represents memory for every slice
             Node *new_mem = (m->is_MergeMem()) ?
                              m->as_MergeMem()->memory_at(alias_idx) : m;
@@ -1962,7 +1962,7 @@
         f[CatchProjNode::fall_through_index] = Type::TOP;
       } else if( call->req() > TypeFunc::Parms ) {
         const Type *arg0 = phase->type( call->in(TypeFunc::Parms) );
-        // Check for null reciever to virtual or interface calls
+        // Check for null receiver to virtual or interface calls
         if( call->is_CallDynamicJava() &&
             arg0->higher_equal(TypePtr::NULL_PTR) ) {
           f[CatchProjNode::fall_through_index] = Type::TOP;
@@ -1995,7 +1995,7 @@
   // also remove any exception table entry.  Thus we must know the call
   // feeding the Catch will not really throw an exception.  This is ok for
   // the main fall-thru control (happens when we know a call can never throw
-  // an exception) or for "rethrow", because a further optimnization will
+  // an exception) or for "rethrow", because a further optimization will
   // yank the rethrow (happens when we inline a function that can throw an
   // exception and the caller has no handler).  Not legal, e.g., for passing
   // a NULL receiver to a v-call, or passing bad types to a slow-check-cast.
--- a/hotspot/src/share/vm/opto/chaitin.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/chaitin.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1246,7 +1246,7 @@
 
       // If the live range is not bound, then we actually had some choices
       // to make.  In this case, the mask has more bits in it than the colors
-      // choosen.  Restrict the mask to just what was picked.
+      // chosen.  Restrict the mask to just what was picked.
       if( lrg->num_regs() == 1 ) { // Size 1 live range
         lrg->Clear();           // Clear the mask
         lrg->Insert(reg);       // Set regmask to match selected reg
--- a/hotspot/src/share/vm/opto/chaitin.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/chaitin.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -327,7 +327,7 @@
   // True if lidx is used before any real register is def'd in the block
   bool prompt_use( Block *b, uint lidx );
   Node *get_spillcopy_wide( Node *def, Node *use, uint uidx );
-  // Insert the spill at chosen location.  Skip over any interveneing Proj's or
+  // Insert the spill at chosen location.  Skip over any intervening Proj's or
   // Phis.  Skip over a CatchNode and projs, inserting in the fall-through block
   // instead.  Update high-pressure indices.  Create a new live range.
   void insert_proj( Block *b, uint i, Node *spill, uint maxlrg );
@@ -431,7 +431,7 @@
   void Simplify();
 
   // Select colors by re-inserting edges into the IFG.
-  // Return TRUE if any spills occured.
+  // Return TRUE if any spills occurred.
   uint Select( );
   // Helper function for select which allows biased coloring
   OptoReg::Name choose_color( LRG &lrg, int chunk );
--- a/hotspot/src/share/vm/opto/classes.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/classes.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -129,11 +129,13 @@
 macro(LShiftI)
 macro(LShiftL)
 macro(LoadB)
+macro(LoadUB)
 macro(LoadUS)
 macro(LoadD)
 macro(LoadD_unaligned)
 macro(LoadF)
 macro(LoadI)
+macro(LoadUI2L)
 macro(LoadKlass)
 macro(LoadNKlass)
 macro(LoadL)
--- a/hotspot/src/share/vm/opto/coalesce.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/coalesce.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -123,7 +123,7 @@
 }
 
 //------------------------------clone_projs------------------------------------
-// After cloning some rematierialized instruction, clone any MachProj's that
+// After cloning some rematerialized instruction, clone any MachProj's that
 // follow it.  Example: Intel zero is XOR, kills flags.  Sparc FP constants
 // use G3 as an address temp.
 int PhaseChaitin::clone_projs( Block *b, uint idx, Node *con, Node *copy, uint &maxlrg ) {
@@ -694,8 +694,8 @@
           } // End of if not infinite-stack neighbor
         } // End of if actually inserted
       } // End of if live range overlaps
-    } // End of else collect intereferences for 1 node
-  } // End of while forever, scan back for intereferences
+    } // End of else collect interferences for 1 node
+  } // End of while forever, scan back for interferences
   return reg_degree;
 }
 
@@ -786,7 +786,7 @@
   if( rm_size == 0 ) return false;
 
   // Another early bail-out test is when we are double-coalescing and the
-  // 2 copies are seperated by some control flow.
+  // 2 copies are separated by some control flow.
   if( dst_copy != src_copy ) {
     Block *src_b = _phc._cfg._bbs[src_copy->_idx];
     Block *b2 = b;
--- a/hotspot/src/share/vm/opto/compile.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/compile.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -337,7 +337,7 @@
     tty->print_cr("*********************************************************");
   }
   if (env()->break_at_compile()) {
-    // Open the debugger when compiing this method.
+    // Open the debugger when compiling this method.
     tty->print("### Breaking when compiling: ");
     method()->print_short_name();
     tty->cr();
@@ -1191,8 +1191,8 @@
     default: ShouldNotReachHere();
     }
     break;
-  case 2:                       // No collasping at level 2; keep all splits
-  case 3:                       // No collasping at level 3; keep all splits
+  case 2:                       // No collapsing at level 2; keep all splits
+  case 3:                       // No collapsing at level 3; keep all splits
     break;
   default:
     Unimplemented();
@@ -2005,8 +2005,10 @@
   case Op_StoreP:
   case Op_StoreN:
   case Op_LoadB:
+  case Op_LoadUB:
   case Op_LoadUS:
   case Op_LoadI:
+  case Op_LoadUI2L:
   case Op_LoadKlass:
   case Op_LoadNKlass:
   case Op_LoadL:
@@ -2102,7 +2104,7 @@
         // [base_reg + offset]
         // NullCheck base_reg
         //
-        // Pin the new DecodeN node to non-null path on these patforms (Sparc)
+        // Pin the new DecodeN node to non-null path on these platform (Sparc)
         // to keep the information to which NULL check the new DecodeN node
         // corresponds to use it as value in implicit_null_check().
         //
--- a/hotspot/src/share/vm/opto/connode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/connode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -71,7 +71,7 @@
 to figure out which test post-dominates.  The real problem is that it doesn't
 matter which one you pick.  After you pick up, the dominating-test elider in
 IGVN can remove the test and allow you to hoist up to the dominating test on
-the choosen oop bypassing the test on the not-choosen oop.  Seen in testing.
+the chosen oop bypassing the test on the not-chosen oop.  Seen in testing.
 Oops.
 
 (3) Leave the CastPP's in.  This makes the graph more accurate in some sense;
--- a/hotspot/src/share/vm/opto/divnode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/divnode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -35,7 +35,7 @@
 // by constant into a multiply/shift/add series. Return false if calculations
 // fail.
 //
-// Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with
+// Borrowed almost verbatim from Hacker's Delight by Henry S. Warren, Jr. with
 // minor type name and parameter changes.
 static bool magic_int_divide_constants(jint d, jint &M, jint &s) {
   int32_t p;
@@ -202,7 +202,7 @@
 // by constant into a multiply/shift/add series. Return false if calculations
 // fail.
 //
-// Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with
+// Borrowed almost verbatim from Hacker's Delight by Henry S. Warren, Jr. with
 // minor type name and parameter changes.  Adjusted to 64 bit word width.
 static bool magic_long_divide_constants(jlong d, jlong &M, jint &s) {
   int64_t p;
@@ -1069,7 +1069,7 @@
 
   int log2_con = -1;
 
-  // If this is a power of two, they maybe we can mask it
+  // If this is a power of two, then maybe we can mask it
   if( is_power_of_2_long(pos_con) ) {
     log2_con = log2_long(pos_con);
 
--- a/hotspot/src/share/vm/opto/domgraph.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/domgraph.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -183,7 +183,7 @@
       if (pre_order == 1)
         t->_parent = NULL;          // first block doesn't have parent
       else {
-        // Save parent (currernt top block on stack) in DFS
+        // Save parent (current top block on stack) in DFS
         t->_parent = &_tarjan[_stack_top->block->_pre_order];
       }
       // Now put this block on stack
--- a/hotspot/src/share/vm/opto/escape.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/escape.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -515,7 +515,7 @@
   // cause the failure in add_offset() with narrow oops since TypeOopPtr()
   // constructor verifies correctness of the offset.
   //
-  // It could happend on subclass's branch (from the type profiling
+  // It could happened on subclass's branch (from the type profiling
   // inlining) which was not eliminated during parsing since the exactness
   // of the allocation type was not propagated to the subclass type check.
   //
@@ -703,7 +703,7 @@
   while (prev != result) {
     prev = result;
     if (result == start_mem)
-      break;  // hit one of our sentinals
+      break;  // hit one of our sentinels
     if (result->is_Mem()) {
       const Type *at = phase->type(result->in(MemNode::Address));
       if (at != Type::TOP) {
@@ -720,7 +720,7 @@
     if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) {
       Node *proj_in = result->in(0);
       if (proj_in->is_Allocate() && proj_in->_idx == (uint)tinst->instance_id()) {
-        break;  // hit one of our sentinals
+        break;  // hit one of our sentinels
       } else if (proj_in->is_Call()) {
         CallNode *call = proj_in->as_Call();
         if (!call->may_modify(tinst, phase)) {
@@ -756,6 +756,16 @@
       } else {
         break;
       }
+    } else if (result->Opcode() == Op_SCMemProj) {
+      assert(result->in(0)->is_LoadStore(), "sanity");
+      const Type *at = phase->type(result->in(0)->in(MemNode::Address));
+      if (at != Type::TOP) {
+        assert (at->isa_ptr() != NULL, "pointer type required.");
+        int idx = C->get_alias_index(at->is_ptr());
+        assert(idx != alias_idx, "Object is not scalar replaceable if a LoadStore node access its field");
+        break;
+      }
+      result = result->in(0)->in(MemNode::Memory);
     }
   }
   if (result->is_Phi()) {
@@ -794,7 +804,7 @@
 //  Phase 2:  Process MemNode's from memnode_worklist. compute new address type and
 //            search the Memory chain for a store with the appropriate type
 //            address type.  If a Phi is found, create a new version with
-//            the approriate memory slices from each of the Phi inputs.
+//            the appropriate memory slices from each of the Phi inputs.
 //            For stores, process the users as follows:
 //               MemNode:  push on memnode_worklist
 //               MergeMem: push on mergemem_worklist
@@ -1548,7 +1558,7 @@
       has_non_escaping_obj = true; // Non GlobalEscape
     Node* n = ptn->_node;
     if (n->is_Allocate() && ptn->_scalar_replaceable ) {
-      // Push scalar replaceable alocations on alloc_worklist
+      // Push scalar replaceable allocations on alloc_worklist
       // for processing in split_unique_types().
       alloc_worklist.append(n);
     }
--- a/hotspot/src/share/vm/opto/gcm.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/gcm.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -57,6 +57,37 @@
   }
 }
 
+//----------------------------replace_block_proj_ctrl-------------------------
+// Nodes that have is_block_proj() nodes as their control need to use
+// the appropriate Region for their actual block as their control since
+// the projection will be in a predecessor block.
+void PhaseCFG::replace_block_proj_ctrl( Node *n ) {
+  const Node *in0 = n->in(0);
+  assert(in0 != NULL, "Only control-dependent");
+  const Node *p = in0->is_block_proj();
+  if (p != NULL && p != n) {    // Control from a block projection?
+    assert(!n->pinned() || n->is_SafePointScalarObject(), "only SafePointScalarObject pinned node is expected here");
+    // Find trailing Region
+    Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block
+    uint j = 0;
+    if (pb->_num_succs != 1) {  // More then 1 successor?
+      // Search for successor
+      uint max = pb->_nodes.size();
+      assert( max > 1, "" );
+      uint start = max - pb->_num_succs;
+      // Find which output path belongs to projection
+      for (j = start; j < max; j++) {
+        if( pb->_nodes[j] == in0 )
+          break;
+      }
+      assert( j < max, "must find" );
+      // Change control to match head of successor basic block
+      j -= start;
+    }
+    n->set_req(0, pb->_succs[j]->head());
+  }
+}
+
 
 //------------------------------schedule_pinned_nodes--------------------------
 // Set the basic block for Nodes pinned into blocks
@@ -68,8 +99,10 @@
     Node *n = spstack.pop();
     if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
       if( n->pinned() && !_bbs.lookup(n->_idx) ) {  // Pinned?  Nail it down!
+        assert( n->in(0), "pinned Node must have Control" );
+        // Before setting block replace block_proj control edge
+        replace_block_proj_ctrl(n);
         Node *input = n->in(0);
-        assert( input, "pinned Node must have Control" );
         while( !input->is_block_start() )
           input = input->in(0);
         Block *b = _bbs[input->_idx];  // Basic block of controlling input
@@ -158,34 +191,12 @@
       uint  i = nstack_top_i;
 
       if (i == 0) {
-        // Special control input processing.
-        // While I am here, go ahead and look for Nodes which are taking control
-        // from a is_block_proj Node.  After I inserted RegionNodes to make proper
-        // blocks, the control at a is_block_proj more properly comes from the
-        // Region being controlled by the block_proj Node.
+        // Fixup some control.  Constants without control get attached
+        // to root and nodes that use is_block_proj() nodes should be attached
+        // to the region that starts their block.
         const Node *in0 = n->in(0);
         if (in0 != NULL) {              // Control-dependent?
-          const Node *p = in0->is_block_proj();
-          if (p != NULL && p != n) {    // Control from a block projection?
-            // Find trailing Region
-            Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block
-            uint j = 0;
-            if (pb->_num_succs != 1) {  // More then 1 successor?
-              // Search for successor
-              uint max = pb->_nodes.size();
-              assert( max > 1, "" );
-              uint start = max - pb->_num_succs;
-              // Find which output path belongs to projection
-              for (j = start; j < max; j++) {
-                if( pb->_nodes[j] == in0 )
-                  break;
-              }
-              assert( j < max, "must find" );
-              // Change control to match head of successor basic block
-              j -= start;
-            }
-            n->set_req(0, pb->_succs[j]->head());
-          }
+          replace_block_proj_ctrl(n);
         } else {               // n->in(0) == NULL
           if (n->req() == 1) { // This guy is a constant with NO inputs?
             n->set_req(0, _root);
@@ -226,6 +237,8 @@
         if (!n->pinned()) {
           // Set earliest legal block.
           _bbs.map(n->_idx, find_deepest_input(n, _bbs));
+        } else {
+          assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge");
         }
 
         if (nstack.is_empty()) {
@@ -593,7 +606,7 @@
           if (pred_block != early) {
             // If any predecessor of the Phi matches the load's "early block",
             // we do not need a precedence edge between the Phi and 'load'
-            // since the load will be forced into a block preceeding the Phi.
+            // since the load will be forced into a block preceding the Phi.
             pred_block->set_raise_LCA_mark(load_index);
             assert(!LCA_orig->dominates(pred_block) ||
                    early->dominates(pred_block), "early is high enough");
@@ -1386,7 +1399,7 @@
 #ifdef ASSERT
   for (uint i = 0; i < _num_blocks; i++ ) {
     Block *b = _blocks[i];
-    assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requiers meaningful block frequency");
+    assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
   }
 #endif
 
@@ -1639,7 +1652,7 @@
       // successor blocks.
       assert(_num_succs == 2, "expecting 2 successors of a null check");
       // If either successor has only one predecessor, then the
-      // probabiltity estimate can be derived using the
+      // probability estimate can be derived using the
       // relative frequency of the successor and this block.
       if (_succs[i]->num_preds() == 2) {
         return _succs[i]->_freq / _freq;
@@ -1841,7 +1854,7 @@
 }
 
 //------------------------------update_succ_freq-------------------------------
-// Update the appropriate frequency associated with block 'b', a succesor of
+// Update the appropriate frequency associated with block 'b', a successor of
 // a block in this loop.
 void CFGLoop::update_succ_freq(Block* b, float freq) {
   if (b->_loop == this) {
@@ -1888,7 +1901,8 @@
   for (int i = 0; i < _members.length(); i++) {
     CFGElement* s = _members.at(i);
     float block_freq = s->_freq * loop_freq;
-    if (block_freq < MIN_BLOCK_FREQUENCY) block_freq = MIN_BLOCK_FREQUENCY;
+    if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY)
+      block_freq = MIN_BLOCK_FREQUENCY;
     s->_freq = block_freq;
   }
   CFGLoop* ch = _child;
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1148,7 +1148,7 @@
   Node   *tst = _gvn.transform( btst );
 
   //-----------
-  // if peephole optimizations occured, a prior test existed.
+  // if peephole optimizations occurred, a prior test existed.
   // If a prior test existed, maybe it dominates as we can avoid this test.
   if (tst != btst && type == T_OBJECT) {
     // At this point we want to scan up the CFG to see if we can
@@ -1196,7 +1196,7 @@
   // Consider using 'Reason_class_check' instead?
 
   // To cause an implicit null check, we set the not-null probability
-  // to the maximum (PROB_MAX).  For an explicit check the probablity
+  // to the maximum (PROB_MAX).  For an explicit check the probability
   // is set to a smaller value.
   if (null_control != NULL || too_many_traps(reason)) {
     // probability is less likely
--- a/hotspot/src/share/vm/opto/ifg.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/ifg.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -292,7 +292,7 @@
 //------------------------------interfere_with_live----------------------------
 // Interfere this register with everything currently live.  Use the RegMasks
 // to trim the set of possible interferences. Return a count of register-only
-// inteferences as an estimate of register pressure.
+// interferences as an estimate of register pressure.
 void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
   uint retval = 0;
   // Interfere with everything live.
--- a/hotspot/src/share/vm/opto/ifnode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/ifnode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -81,7 +81,7 @@
   uint i4;
   for( i4 = 1; i4 < phi->req(); i4++ ) {
     con1 = phi->in(i4);
-    if( !con1 ) return NULL;    // Do not optimize partially collaped merges
+    if( !con1 ) return NULL;    // Do not optimize partially collapsed merges
     if( con1->is_Con() ) break; // Found a constant
     // Also allow null-vs-not-null checks
     const TypePtr *tp = igvn->type(con1)->isa_ptr();
@@ -204,7 +204,7 @@
   //      T  F                              T  F                  T  F
   // ..s..    ..t ..                   ..s..    ..t..        ..s..    ..t..
   //
-  // Split the paths coming into the merge point into 2 seperate groups of
+  // Split the paths coming into the merge point into 2 separate groups of
   // merges.  On the left will be all the paths feeding constants into the
   // Cmp's Phi.  On the right will be the remaining paths.  The Cmp's Phi
   // will fold up into a constant; this will let the Cmp fold up as well as
@@ -236,7 +236,7 @@
   }
 
   // Register the new RegionNodes but do not transform them.  Cannot
-  // transform until the entire Region/Phi conglerate has been hacked
+  // transform until the entire Region/Phi conglomerate has been hacked
   // as a single huge transform.
   igvn->register_new_node_with_optimizer( region_c );
   igvn->register_new_node_with_optimizer( region_x );
@@ -599,7 +599,7 @@
 
 //------------------------------fold_compares----------------------------
 // See if a pair of CmpIs can be converted into a CmpU.  In some cases
-// the direction of this if is determined by the preciding if so it
+// the direction of this if is determined by the preceding if so it
 // can be eliminate entirely.  Given an if testing (CmpI n c) check
 // for an immediately control dependent if that is testing (CmpI n c2)
 // and has one projection leading to this if and the other projection
@@ -811,7 +811,7 @@
     // Try to remove extra range checks.  All 'up_one_dom' gives up at merges
     // so all checks we inspect post-dominate the top-most check we find.
     // If we are going to fail the current check and we reach the top check
-    // then we are guarenteed to fail, so just start interpreting there.
+    // then we are guaranteed to fail, so just start interpreting there.
     // We 'expand' the top 2 range checks to include all post-dominating
     // checks.
 
--- a/hotspot/src/share/vm/opto/library_call.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -992,7 +992,7 @@
   Node *argument = pop();  // pop non-receiver first:  it was pushed second
   Node *receiver = pop();
 
-  // don't intrinsify is argument isn't a constant string.
+  // don't intrinsify if argument isn't a constant string.
   if (!argument->is_Con()) {
     return false;
   }
@@ -1267,7 +1267,7 @@
   //   result = DPow(x,y);
   // }
   // if (result != result)?  {
-  //   ucommon_trap();
+  //   uncommon_trap();
   // }
   // return result;
 
@@ -1324,7 +1324,7 @@
     // Check if (y isn't int) then go to slow path
 
     Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmpinty, BoolTest::ne ) );
-    // Branch eith way
+    // Branch either way
     IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
     Node *slow_path = opt_iff(r,if2); // Set region path 2
 
@@ -1715,8 +1715,8 @@
 }
 
 //----------------------------inline_reverseBytes_int/long-------------------
-// inline Int.reverseBytes(int)
-// inline Long.reverseByes(long)
+// inline Integer.reverseBytes(int)
+// inline Long.reverseBytes(long)
 bool LibraryCallKit::inline_reverseBytes(vmIntrinsics::ID id) {
   assert(id == vmIntrinsics::_reverseBytes_i || id == vmIntrinsics::_reverseBytes_l, "not reverse Bytes");
   if (id == vmIntrinsics::_reverseBytes_i && !Matcher::has_match_rule(Op_ReverseBytesI)) return false;
@@ -1915,7 +1915,7 @@
     // addition to memory membars when is_volatile. This is a little
     // too strong, but avoids the need to insert per-alias-type
     // volatile membars (for stores; compare Parse::do_put_xxx), which
-    // we cannot do effctively here because we probably only have a
+    // we cannot do effectively here because we probably only have a
     // rough approximation of type.
     need_mem_bar = true;
     // For Stores, place a memory ordering barrier now.
@@ -2099,7 +2099,7 @@
   // overly confusing.  (This is a true fact! I originally combined
   // them, but even I was confused by it!) As much code/comments as
   // possible are retained from inline_unsafe_access though to make
-  // the correspondances clearer. - dl
+  // the correspondences clearer. - dl
 
   if (callee()->is_static())  return false;  // caller must have the capability!
 
@@ -2166,7 +2166,7 @@
   int alias_idx = C->get_alias_index(adr_type);
 
   // Memory-model-wise, a CAS acts like a little synchronized block,
-  // so needs barriers on each side.  These don't't translate into
+  // so needs barriers on each side.  These don't translate into
   // actual barriers on most machines, but we still need rest of
   // compiler to respect ordering.
 
@@ -3208,7 +3208,7 @@
   Node *hash_shift     = _gvn.intcon(markOopDesc::hash_shift);
   Node *hshifted_header= _gvn.transform( new (C, 3) URShiftXNode(header, hash_shift) );
   // This hack lets the hash bits live anywhere in the mark object now, as long
-  // as the shift drops the relevent bits into the low 32 bits.  Note that
+  // as the shift drops the relevant bits into the low 32 bits.  Note that
   // Java spec says that HashCode is an int so there's no point in capturing
   // an 'X'-sized hashcode (32 in 32-bit build or 64 in 64-bit build).
   hshifted_header      = ConvX2I(hshifted_header);
@@ -3255,7 +3255,7 @@
 }
 
 //---------------------------inline_native_getClass----------------------------
-// Build special case code for calls to hashCode on an object.
+// Build special case code for calls to getClass on an object.
 bool LibraryCallKit::inline_native_getClass() {
   Node* obj = null_check_receiver(callee());
   if (stopped())  return true;
@@ -4594,7 +4594,7 @@
   }
 
   // The memory edges above are precise in order to model effects around
-  // array copyies accurately to allow value numbering of field loads around
+  // array copies accurately to allow value numbering of field loads around
   // arraycopy.  Such field loads, both before and after, are common in Java
   // collections and similar classes involving header/array data structures.
   //
--- a/hotspot/src/share/vm/opto/live.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/live.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -39,7 +39,7 @@
 // Leftover bits become the new live-in for the predecessor block, and the pred
 // block is put on the worklist.
 //   The locally live-in stuff is computed once and added to predecessor
-// live-out sets.  This seperate compilation is done in the outer loop below.
+// live-out sets.  This separate compilation is done in the outer loop below.
 PhaseLive::PhaseLive( const PhaseCFG &cfg, LRG_List &names, Arena *arena ) : Phase(LIVE), _cfg(cfg), _names(names), _arena(arena), _live(0) {
 }
 
--- a/hotspot/src/share/vm/opto/locknode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/locknode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -121,7 +121,7 @@
   kill_dead_locals();
 
   pop();                        // Pop oop to unlock
-  // Because monitors are guarenteed paired (else we bail out), we know
+  // Because monitors are guaranteed paired (else we bail out), we know
   // the matching Lock for this Unlock.  Hence we know there is no need
   // for a null check on Unlock.
   shared_unlock(map()->peek_monitor_box(), map()->peek_monitor_obj());
--- a/hotspot/src/share/vm/opto/loopTransform.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -119,7 +119,7 @@
 
 //---------------------is_invariant_addition-----------------------------
 // Return nonzero index of invariant operand for an Add or Sub
-// of (nonconstant) invariant and variant values. Helper for reassoicate_invariants.
+// of (nonconstant) invariant and variant values. Helper for reassociate_invariants.
 int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) {
   int op = n->Opcode();
   if (op == Op_AddI || op == Op_SubI) {
@@ -520,7 +520,7 @@
 //------------------------------policy_align-----------------------------------
 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 // expression that does the alignment.  Note that only one array base can be
-// aligned in a loop (unless the VM guarentees mutual alignment).  Note that
+// aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 // if we vectorize short memory ops into longer memory ops, we may want to
 // increase alignment.
 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
--- a/hotspot/src/share/vm/opto/loopUnswitch.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/loopUnswitch.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -131,7 +131,7 @@
 
   ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj();
 
-  // Hoist invariant casts out of each loop to the appropiate
+  // Hoist invariant casts out of each loop to the appropriate
   // control projection.
 
   Node_List worklist;
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -274,7 +274,7 @@
   //
   // Canonicalize the condition on the test.  If we can exactly determine
   // the trip-counter exit value, then set limit to that value and use
-  // a '!=' test.  Otherwise use conditon '<' for count-up loops and
+  // a '!=' test.  Otherwise use condition '<' for count-up loops and
   // '>' for count-down loops.  If the condition is inverted and we will
   // be rolling through MININT to MAXINT, then bail out.
 
@@ -290,7 +290,7 @@
 
   // If compare points to incr, we are ok.  Otherwise the compare
   // can directly point to the phi; in this case adjust the compare so that
-  // it points to the incr by adusting the limit.
+  // it points to the incr by adjusting the limit.
   if( cmp->in(1) == phi || cmp->in(2) == phi )
     limit = gvn->transform(new (C, 3) AddINode(limit,stride));
 
@@ -471,7 +471,7 @@
   lazy_replace( x, l );
   set_idom(l, init_control, dom_depth(x));
 
-  // Check for immediately preceeding SafePoint and remove
+  // Check for immediately preceding SafePoint and remove
   Node *sfpt2 = le->in(0);
   if( sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2))
     lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
@@ -1506,7 +1506,7 @@
 
   // Build Dominators for elision of NULL checks & loop finding.
   // Since nodes do not have a slot for immediate dominator, make
-  // a persistant side array for that info indexed on node->_idx.
+  // a persistent side array for that info indexed on node->_idx.
   _idom_size = C->unique();
   _idom      = NEW_RESOURCE_ARRAY( Node*, _idom_size );
   _dom_depth = NEW_RESOURCE_ARRAY( uint,  _idom_size );
@@ -1529,7 +1529,7 @@
 
   // Given dominators, try to find inner loops with calls that must
   // always be executed (call dominates loop tail).  These loops do
-  // not need a seperate safepoint.
+  // not need a separate safepoint.
   Node_List cisstack(a);
   _ltree_root->check_safepts(visited, cisstack);
 
@@ -2332,7 +2332,7 @@
       if (done) {
         // All of n's inputs have been processed, complete post-processing.
 
-        // Compute earilest point this Node can go.
+        // Compute earliest point this Node can go.
         // CFG, Phi, pinned nodes already know their controlling input.
         if (!has_node(n)) {
           // Record earliest legal location
@@ -2672,9 +2672,9 @@
       pinned = false;
     }
     if( pinned ) {
-      IdealLoopTree *choosen_loop = get_loop(n->is_CFG() ? n : get_ctrl(n));
-      if( !choosen_loop->_child )       // Inner loop?
-        choosen_loop->_body.push(n); // Collect inner loops
+      IdealLoopTree *chosen_loop = get_loop(n->is_CFG() ? n : get_ctrl(n));
+      if( !chosen_loop->_child )       // Inner loop?
+        chosen_loop->_body.push(n); // Collect inner loops
       return;
     }
   } else {                      // No slot zero
@@ -2746,9 +2746,9 @@
   set_ctrl(n, least);
 
   // Collect inner loop bodies
-  IdealLoopTree *choosen_loop = get_loop(least);
-  if( !choosen_loop->_child )   // Inner loop?
-    choosen_loop->_body.push(n);// Collect inner loops
+  IdealLoopTree *chosen_loop = get_loop(least);
+  if( !chosen_loop->_child )   // Inner loop?
+    chosen_loop->_body.push(n);// Collect inner loops
 }
 
 #ifndef PRODUCT
--- a/hotspot/src/share/vm/opto/loopnode.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -390,7 +390,7 @@
 
   // Return TRUE or FALSE if the loop should be cache-line aligned.
   // Gather the expression that does the alignment.  Note that only
-  // one array base can be aligned in a loop (unless the VM guarentees
+  // one array base can be aligned in a loop (unless the VM guarantees
   // mutual alignment).  Note that if we vectorize short memory ops
   // into longer memory ops, we may want to increase alignment.
   bool policy_align( PhaseIdealLoop *phase ) const;
@@ -403,7 +403,7 @@
   // Reassociate invariant add and subtract expressions.
   Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase);
   // Return nonzero index of invariant operand if invariant and variant
-  // are combined with an Add or Sub. Helper for reassoicate_invariants.
+  // are combined with an Add or Sub. Helper for reassociate_invariants.
   int is_invariant_addition(Node* n, PhaseIdealLoop *phase);
 
   // Return true if n is invariant
--- a/hotspot/src/share/vm/opto/loopopts.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/loopopts.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -97,7 +97,7 @@
       // (Note: This tweaking with igvn only works because x is a new node.)
       _igvn.set_type(x, t);
       // If x is a TypeNode, capture any more-precise type permanently into Node
-      // othewise it will be not updated during igvn->transform since
+      // otherwise it will be not updated during igvn->transform since
       // igvn->type(x) is set to x->Value() already.
       x->raise_bottom_type(t);
       Node *y = x->Identity(&_igvn);
@@ -879,7 +879,7 @@
             Node *x_ctrl = NULL;
             if( u->is_Phi() ) {
               // Replace all uses of normal nodes.  Replace Phi uses
-              // individually, so the seperate Nodes can sink down
+              // individually, so the separate Nodes can sink down
               // different paths.
               uint k = 1;
               while( u->in(k) != n ) k++;
--- a/hotspot/src/share/vm/opto/machnode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/machnode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -136,7 +136,7 @@
 // Size of instruction in bytes
 uint MachNode::size(PhaseRegAlloc *ra_) const {
   // If a virtual was not defined for this specific instruction,
-  // Call the helper which finds the size by emiting the bits.
+  // Call the helper which finds the size by emitting the bits.
   return MachNode::emit_size(ra_);
 }
 
--- a/hotspot/src/share/vm/opto/macro.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/macro.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -64,6 +64,7 @@
       uint old_unique = C->unique();
       Node* new_in = old_sosn->clone(jvms_adj, sosn_map);
       if (old_unique != C->unique()) {
+        new_in->set_req(0, newcall->in(0)); // reset control edge
         new_in = transform_later(new_in); // Register new node.
       }
       old_in = new_in;
@@ -215,7 +216,7 @@
   const TypeOopPtr *tinst = phase->C->get_adr_type(alias_idx)->isa_oopptr();
   while (true) {
     if (mem == alloc_mem || mem == start_mem ) {
-      return mem;  // hit one of our sentinals
+      return mem;  // hit one of our sentinels
     } else if (mem->is_MergeMem()) {
       mem = mem->as_MergeMem()->memory_at(alias_idx);
     } else if (mem->is_Proj() && mem->as_Proj()->_con == TypeFunc::Memory) {
@@ -250,6 +251,15 @@
         assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw");
       }
       mem = mem->in(MemNode::Memory);
+    } else if (mem->Opcode() == Op_SCMemProj) {
+      assert(mem->in(0)->is_LoadStore(), "sanity");
+      const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr();
+      int adr_idx = Compile::current()->get_alias_index(atype);
+      if (adr_idx == alias_idx) {
+        assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
+        return NULL;
+      }
+      mem = mem->in(0)->in(MemNode::Memory);
     } else {
       return mem;
     }
@@ -329,8 +339,15 @@
           return NULL;
         }
         values.at_put(j, val);
+      } else if (val->Opcode() == Op_SCMemProj) {
+        assert(val->in(0)->is_LoadStore(), "sanity");
+        assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
+        return NULL;
       } else {
+#ifdef ASSERT
+        val->dump();
         assert(false, "unknown node on this path");
+#endif
         return NULL;  // unknown node on this path
       }
     }
@@ -1651,7 +1668,7 @@
 
   if (UseOptoBiasInlining) {
     /*
-     *  See the full descrition in MacroAssembler::biased_locking_enter().
+     *  See the full description in MacroAssembler::biased_locking_enter().
      *
      *  if( (mark_word & biased_lock_mask) == biased_lock_pattern ) {
      *    // The object is biased.
@@ -1887,7 +1904,7 @@
 
   if (UseOptoBiasInlining) {
     // Check for biased locking unlock case, which is a no-op.
-    // See the full descrition in MacroAssembler::biased_locking_exit().
+    // See the full description in MacroAssembler::biased_locking_exit().
     region  = new (C, 4) RegionNode(4);
     // create a Phi for the memory state
     mem_phi = new (C, 4) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
--- a/hotspot/src/share/vm/opto/matcher.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -897,7 +897,7 @@
 #ifdef ASSERT
           _new2old_map.map(m->_idx, n);
 #endif
-          mstack.push(m, Post_Visit, n, i); // Don't neet to visit
+          mstack.push(m, Post_Visit, n, i); // Don't need to visit
           mstack.push(m->in(0), Visit, m, 0);
         } else {
           mstack.push(m, Visit, n, i);
@@ -1267,7 +1267,7 @@
     }
   }
 
-  // Not forceably cloning.  If shared, put it into a register.
+  // Not forceable cloning.  If shared, put it into a register.
   return shared;
 }
 
@@ -1542,7 +1542,7 @@
   // This is what my child will give me.
   int opnd_class_instance = s->_rule[op];
   // Choose between operand class or not.
-  // This is what I will recieve.
+  // This is what I will receive.
   int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
   // New rule for child.  Chase operand classes to get the actual rule.
   int newrule = s->_rule[catch_op];
@@ -1707,11 +1707,18 @@
 void Matcher::find_shared( Node *n ) {
   // Allocate stack of size C->unique() * 2 to avoid frequent realloc
   MStack mstack(C->unique() * 2);
+  // Mark nodes as address_visited if they are inputs to an address expression
+  VectorSet address_visited(Thread::current()->resource_area());
   mstack.push(n, Visit);     // Don't need to pre-visit root node
   while (mstack.is_nonempty()) {
     n = mstack.node();       // Leave node on stack
     Node_State nstate = mstack.state();
+    uint nop = n->Opcode();
     if (nstate == Pre_Visit) {
+      if (address_visited.test(n->_idx)) { // Visited in address already?
+        // Flag as visited and shared now.
+        set_visited(n);
+      }
       if (is_visited(n)) {   // Visited already?
         // Node is shared and has no reason to clone.  Flag it as shared.
         // This causes it to match into a register for the sharing.
@@ -1726,7 +1733,7 @@
       set_visited(n);   // Flag as visited now
       bool mem_op = false;
 
-      switch( n->Opcode() ) {  // Handle some opcodes special
+      switch( nop ) {  // Handle some opcodes special
       case Op_Phi:             // Treat Phis as shared roots
       case Op_Parm:
       case Op_Proj:            // All handled specially during matching
@@ -1887,34 +1894,51 @@
             // to have a single use so force sharing here.
             set_shared(m->in(AddPNode::Base)->in(1));
           }
+
+          // Some inputs for address expression are not put on stack
+          // to avoid marking them as shared and forcing them into register
+          // if they are used only in address expressions.
+          // But they should be marked as shared if there are other uses
+          // besides address expressions.
+
           Node *off = m->in(AddPNode::Offset);
-          if( off->is_Con() ) {
-            set_visited(m);  // Flag as visited now
+          if( off->is_Con() &&
+              // When there are other uses besides address expressions
+              // put it on stack and mark as shared.
+              !is_visited(m) ) {
+            address_visited.test_set(m->_idx); // Flag as address_visited
             Node *adr = m->in(AddPNode::Address);
 
             // Intel, ARM and friends can handle 2 adds in addressing mode
             if( clone_shift_expressions && adr->is_AddP() &&
                 // AtomicAdd is not an addressing expression.
                 // Cheap to find it by looking for screwy base.
-                !adr->in(AddPNode::Base)->is_top() ) {
-              set_visited(adr);  // Flag as visited now
+                !adr->in(AddPNode::Base)->is_top() &&
+                // Are there other uses besides address expressions?
+                !is_visited(adr) ) {
+              address_visited.set(adr->_idx); // Flag as address_visited
               Node *shift = adr->in(AddPNode::Offset);
               // Check for shift by small constant as well
               if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
-                  shift->in(2)->get_int() <= 3 ) {
-                set_visited(shift);  // Flag as visited now
+                  shift->in(2)->get_int() <= 3 &&
+                  // Are there other uses besides address expressions?
+                  !is_visited(shift) ) {
+                address_visited.set(shift->_idx); // Flag as address_visited
                 mstack.push(shift->in(2), Visit);
+                Node *conv = shift->in(1);
 #ifdef _LP64
                 // Allow Matcher to match the rule which bypass
                 // ConvI2L operation for an array index on LP64
                 // if the index value is positive.
-                if( shift->in(1)->Opcode() == Op_ConvI2L &&
-                    shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) {
-                  set_visited(shift->in(1));  // Flag as visited now
-                  mstack.push(shift->in(1)->in(1), Pre_Visit);
+                if( conv->Opcode() == Op_ConvI2L &&
+                    conv->as_Type()->type()->is_long()->_lo >= 0 &&
+                    // Are there other uses besides address expressions?
+                    !is_visited(conv) ) {
+                  address_visited.set(conv->_idx); // Flag as address_visited
+                  mstack.push(conv->in(1), Pre_Visit);
                 } else
 #endif
-                mstack.push(shift->in(1), Pre_Visit);
+                mstack.push(conv, Pre_Visit);
               } else {
                 mstack.push(shift, Pre_Visit);
               }
@@ -1942,7 +1966,7 @@
       // BoolNode::match_edge always returns a zero.
 
       // We reorder the Op_If in a pre-order manner, so we can visit without
-      // accidently sharing the Cmp (the Bool and the If make 2 users).
+      // accidentally sharing the Cmp (the Bool and the If make 2 users).
       n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool
     }
     else if (nstate == Post_Visit) {
--- a/hotspot/src/share/vm/opto/memnode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -100,12 +100,12 @@
   while (prev != result) {
     prev = result;
     if (result == start_mem)
-      break;  // hit one of our sentinals
+      break;  // hit one of our sentinels
     // skip over a call which does not affect this memory slice
     if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) {
       Node *proj_in = result->in(0);
       if (proj_in->is_Allocate() && proj_in->_idx == instance_id) {
-        break;  // hit one of our sentinals
+        break;  // hit one of our sentinels
       } else if (proj_in->is_Call()) {
         CallNode *call = proj_in->as_Call();
         if (!call->may_modify(t_adr, phase)) {
@@ -198,7 +198,7 @@
     // If not, we can update the input infinitely along a MergeMem cycle
     // Equivalent code in PhiNode::Ideal
     Node* m  = phase->transform(mmem);
-    // If tranformed to a MergeMem, get the desired slice
+    // If transformed to a MergeMem, get the desired slice
     // Otherwise the returned node represents memory for every slice
     mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m;
     // Update input if it is progress over what we have now
@@ -778,7 +778,7 @@
            adr_type->offset() == arrayOopDesc::length_offset_in_bytes()),
          "use LoadRangeNode instead");
   switch (bt) {
-  case T_BOOLEAN:
+  case T_BOOLEAN: return new (C, 3) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int()    );
   case T_BYTE:    return new (C, 3) LoadBNode (ctl, mem, adr, adr_type, rt->is_int()    );
   case T_INT:     return new (C, 3) LoadINode (ctl, mem, adr, adr_type, rt->is_int()    );
   case T_CHAR:    return new (C, 3) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int()    );
@@ -970,7 +970,7 @@
   }
 
   // Search for an existing data phi which was generated before for the same
-  // instance's field to avoid infinite genertion of phis in a loop.
+  // instance's field to avoid infinite generation of phis in a loop.
   Node *region = mem->in(0);
   if (is_instance_field_load_with_local_phi(region)) {
     const TypePtr *addr_t = in(MemNode::Address)->bottom_type()->isa_ptr();
@@ -1066,11 +1066,11 @@
         break;
       }
     }
-    LoadNode* load = NULL;
-    if (allocation != NULL && base->in(load_index)->is_Load()) {
-      load = base->in(load_index)->as_Load();
-    }
-    if (load != NULL && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) {
+    bool has_load = ( allocation != NULL &&
+                      (base->in(load_index)->is_Load() ||
+                       base->in(load_index)->is_DecodeN() &&
+                       base->in(load_index)->in(1)->is_Load()) );
+    if (has_load && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) {
       // Push the loads from the phi that comes from valueOf up
       // through it to allow elimination of the loads and the recovery
       // of the original value.
@@ -1106,11 +1106,20 @@
       result->set_req(load_index, in2);
       return result;
     }
-  } else if (base->is_Load()) {
+  } else if (base->is_Load() ||
+             base->is_DecodeN() && base->in(1)->is_Load()) {
+    if (base->is_DecodeN()) {
+      // Get LoadN node which loads cached Integer object
+      base = base->in(1);
+    }
     // Eliminate the load of Integer.value for integers from the cache
     // array by deriving the value from the index into the array.
     // Capture the offset of the load and then reverse the computation.
     Node* load_base = base->in(Address)->in(AddPNode::Base);
+    if (load_base->is_DecodeN()) {
+      // Get LoadN node which loads IntegerCache.cache field
+      load_base = load_base->in(1);
+    }
     if (load_base != NULL) {
       Compile::AliasType* atp = phase->C->alias_type(load_base->adr_type());
       intptr_t cache_offset;
@@ -1245,7 +1254,7 @@
       // (This tweaking with igvn only works because x is a new node.)
       igvn->set_type(x, t);
       // If x is a TypeNode, capture any more-precise type permanently into Node
-      // othewise it will be not updated during igvn->transform since
+      // otherwise it will be not updated during igvn->transform since
       // igvn->type(x) is set to x->Value() already.
       x->raise_bottom_type(t);
       Node *y = x->Identity(igvn);
@@ -1607,6 +1616,22 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+//--------------------------LoadUBNode::Ideal-------------------------------------
+//
+//  If the previous store is to the same address as this load,
+//  and the value stored was larger than a byte, replace this load
+//  with the value stored truncated to a byte.  If no truncation is
+//  needed, the replacement is done in LoadNode::Identity().
+//
+Node* LoadUBNode::Ideal(PhaseGVN* phase, bool can_reshape) {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem, phase);
+  if (value && !phase->type(value)->higher_equal(_type))
+    return new (phase->C, 3) AndINode(value, phase->intcon(0xFF));
+  // Identity call will handle the case where truncation is not needed.
+  return LoadNode::Ideal(phase, can_reshape);
+}
+
 //--------------------------LoadUSNode::Ideal-------------------------------------
 //
 //  If the previous store is to the same address as this load,
@@ -2582,7 +2607,7 @@
 // capturing of nearby memory operations.
 //
 // During macro-expansion, all captured initializations which store
-// constant values of 32 bits or smaller are coalesced (if advantagous)
+// constant values of 32 bits or smaller are coalesced (if advantageous)
 // into larger 'tiles' 32 or 64 bits.  This allows an object to be
 // initialized in fewer memory operations.  Memory words which are
 // covered by neither tiles nor non-constant stores are pre-zeroed
@@ -3669,7 +3694,7 @@
     else if (old_mmem != NULL) {
       new_mem = old_mmem->memory_at(i);
     }
-    // else preceeding memory was not a MergeMem
+    // else preceding memory was not a MergeMem
 
     // replace equivalent phis (unfortunately, they do not GVN together)
     if (new_mem != NULL && new_mem != new_base &&
--- a/hotspot/src/share/vm/opto/memnode.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -207,6 +207,19 @@
   virtual BasicType memory_type() const { return T_BYTE; }
 };
 
+//------------------------------LoadUBNode-------------------------------------
+// Load a unsigned byte (8bits unsigned) from memory
+class LoadUBNode : public LoadNode {
+public:
+  LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti = TypeInt::UBYTE )
+    : LoadNode(c, mem, adr, at, ti) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual int store_Opcode() const { return Op_StoreB; }
+  virtual BasicType memory_type() const { return T_BYTE; }
+};
+
 //------------------------------LoadUSNode-------------------------------------
 // Load an unsigned short/char (16bits unsigned) from memory
 class LoadUSNode : public LoadNode {
@@ -232,6 +245,18 @@
   virtual BasicType memory_type() const { return T_INT; }
 };
 
+//------------------------------LoadUI2LNode-----------------------------------
+// Load an unsigned integer into long from memory
+class LoadUI2LNode : public LoadNode {
+public:
+  LoadUI2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong* t = TypeLong::UINT)
+    : LoadNode(c, mem, adr, at, t) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegL; }
+  virtual int store_Opcode() const { return Op_StoreL; }
+  virtual BasicType memory_type() const { return T_LONG; }
+};
+
 //------------------------------LoadRangeNode----------------------------------
 // Load an array length from the array
 class LoadRangeNode : public LoadINode {
@@ -757,10 +782,10 @@
 // Model.  Monitor-enter and volatile-load act as Aquires: no following ref
 // can be moved to before them.  We insert a MemBar-Acquire after a FastLock or
 // volatile-load.  Monitor-exit and volatile-store act as Release: no
-// preceeding ref can be moved to after them.  We insert a MemBar-Release
+// preceding ref can be moved to after them.  We insert a MemBar-Release
 // before a FastUnlock or volatile-store.  All volatiles need to be
 // serialized, so we follow all volatile-stores with a MemBar-Volatile to
-// seperate it from any following volatile-load.
+// separate it from any following volatile-load.
 class MemBarNode: public MultiNode {
   virtual uint hash() const ;                  // { return NO_HASH; }
   virtual uint cmp( const Node &n ) const ;    // Always fail, except on self
--- a/hotspot/src/share/vm/opto/mulnode.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/mulnode.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -486,20 +486,23 @@
     return new (phase->C, 3) AndINode(ldus, phase->intcon(mask&0xFFFF));
   }
 
-  // Masking sign bits off of a Byte?  Let the matcher use an unsigned load
-  if( lop == Op_LoadB &&
-      (!in(0) && load->in(0)) &&
-      (mask == 0x000000FF) ) {
-    // Associate this node with the LoadB, so the matcher can see them together.
-    // If we don't do this, it is common for the LoadB to have one control
-    // edge, and the store or call containing this AndI to have a different
-    // control edge.  This will cause Label_Root to group the AndI with
-    // the encoding store or call, so the matcher has no chance to match
-    // this AndI together with the LoadB.  Setting the control edge here
-    // prevents Label_Root from grouping the AndI with the store or call,
-    // if it has a control edge that is inconsistent with the LoadB.
-    set_req(0, load->in(0));
-    return this;
+  // Masking sign bits off of a Byte?  Do an unsigned byte load.
+  if (lop == Op_LoadB && mask == 0x000000FF) {
+    return new (phase->C, 3) LoadUBNode(load->in(MemNode::Control),
+                                        load->in(MemNode::Memory),
+                                        load->in(MemNode::Address),
+                                        load->adr_type());
+  }
+
+  // Masking sign bits off of a Byte plus additional lower bits?  Do
+  // an unsigned byte load plus an and.
+  if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
+    Node* ldub = new (phase->C, 3) LoadUBNode(load->in(MemNode::Control),
+                                              load->in(MemNode::Memory),
+                                              load->in(MemNode::Address),
+                                              load->adr_type());
+    ldub = phase->transform(ldub);
+    return new (phase->C, 3) AndINode(ldub, phase->intcon(mask));
   }
 
   // Masking off sign bits?  Dont make them!
@@ -599,12 +602,21 @@
   if( !t2 || !t2->is_con() ) return MulNode::Ideal(phase, can_reshape);
   const jlong mask = t2->get_con();
 
-  Node *rsh = in(1);
-  uint rop = rsh->Opcode();
+  Node* in1 = in(1);
+  uint op = in1->Opcode();
+
+  // Masking sign bits off of an integer?  Do an unsigned integer to long load.
+  if (op == Op_ConvI2L && in1->in(1)->Opcode() == Op_LoadI && mask == 0x00000000FFFFFFFFL) {
+    Node* load = in1->in(1);
+    return new (phase->C, 3) LoadUI2LNode(load->in(MemNode::Control),
+                                          load->in(MemNode::Memory),
+                                          load->in(MemNode::Address),
+                                          load->adr_type());
+  }
 
   // Masking off sign bits?  Dont make them!
-  if( rop == Op_RShiftL ) {
-    const TypeInt *t12 = phase->type(rsh->in(2))->isa_int();
+  if (op == Op_RShiftL) {
+    const TypeInt *t12 = phase->type(in1->in(2))->isa_int();
     if( t12 && t12->is_con() ) { // Shift is by a constant
       int shift = t12->get_con();
       shift &= BitsPerJavaLong - 1;  // semantics of Java shifts
@@ -613,7 +625,7 @@
       // bits survive.  NO sign-extension bits survive the maskings.
       if( (sign_bits_mask & mask) == 0 ) {
         // Use zero-fill shift instead
-        Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(rsh->in(1),rsh->in(2)));
+        Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(in1->in(1), in1->in(2)));
         return new (phase->C, 3) AndLNode( zshift, in(2) );
       }
     }
--- a/hotspot/src/share/vm/opto/node.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/node.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -968,22 +968,23 @@
 // Example: when reshape "(X+3)+4" into "X+7" you must leave the Node for
 // "X+3" unchanged in case it is shared.
 //
-// If you modify the 'this' pointer's inputs, you must use 'set_req' with
-// def-use info.  If you are making a new Node (either as the new root or
-// some new internal piece) you must NOT use set_req with def-use info.
-// You can make a new Node with either 'new' or 'clone'.  In either case,
-// def-use info is (correctly) not generated.
+// If you modify the 'this' pointer's inputs, you should use
+// 'set_req'.  If you are making a new Node (either as the new root or
+// some new internal piece) you may use 'init_req' to set the initial
+// value.  You can make a new Node with either 'new' or 'clone'.  In
+// either case, def-use info is correctly maintained.
+//
 // Example: reshape "(X+3)+4" into "X+7":
-//    set_req(1,in(1)->in(1) /* grab X */, du /* must use DU on 'this' */);
-//    set_req(2,phase->intcon(7),du);
+//    set_req(1, in(1)->in(1));
+//    set_req(2, phase->intcon(7));
 //    return this;
-// Example: reshape "X*4" into "X<<1"
-//    return new (C,3) LShiftINode( in(1), phase->intcon(1) );
+// Example: reshape "X*4" into "X<<2"
+//    return new (C,3) LShiftINode(in(1), phase->intcon(2));
 //
 // You must call 'phase->transform(X)' on any new Nodes X you make, except
-// for the returned root node.  Example: reshape "X*31" with "(X<<5)-1".
+// for the returned root node.  Example: reshape "X*31" with "(X<<5)-X".
 //    Node *shift=phase->transform(new(C,3)LShiftINode(in(1),phase->intcon(5)));
-//    return new (C,3) AddINode(shift, phase->intcon(-1));
+//    return new (C,3) AddINode(shift, in(1));
 //
 // When making a Node for a constant use 'phase->makecon' or 'phase->intcon'.
 // These forms are faster than 'phase->transform(new (C,1) ConNode())' and Do
@@ -1679,7 +1680,7 @@
   if (visited.member(this))  return;
   visited.push(this);
 
-  // Walk over all input edges, checking for correspondance
+  // Walk over all input edges, checking for correspondence
   for( i = 0; i < len(); i++ ) {
     n = in(i);
     if (n != NULL && !n->is_top()) {
@@ -1723,7 +1724,7 @@
   // Contained in new_space or old_space?
   VectorSet *v = C->node_arena()->contains(n) ? &new_space : &old_space;
   // Check for visited in the proper space.  Numberings are not unique
-  // across spaces so we need a seperate VectorSet for each space.
+  // across spaces so we need a separate VectorSet for each space.
   if( v->test_set(n->_idx) ) return;
 
   if (n->is_Con() && n->bottom_type() == Type::TOP) {
--- a/hotspot/src/share/vm/opto/node.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/node.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -257,7 +257,7 @@
   Node **_in;                   // Array of use-def references to Nodes
   Node **_out;                  // Array of def-use references to Nodes
 
-  // Input edges are split into two catagories.  Required edges are required
+  // Input edges are split into two categories.  Required edges are required
   // for semantic correctness; order is important and NULLs are allowed.
   // Precedence edges are used to help determine execution order and are
   // added, e.g., for scheduling purposes.  They are unordered and not
@@ -854,7 +854,7 @@
 
   // If the hash function returns the special sentinel value NO_HASH,
   // the node is guaranteed never to compare equal to any other node.
-  // If we accidently generate a hash with value NO_HASH the node
+  // If we accidentally generate a hash with value NO_HASH the node
   // won't go into the table and we'll lose a little optimization.
   enum { NO_HASH = 0 };
   virtual uint hash() const;
--- a/hotspot/src/share/vm/opto/output.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/output.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1171,7 +1171,7 @@
         cb->flush_bundle(false);
 
       // The following logic is duplicated in the code ifdeffed for
-      // ENABLE_ZAP_DEAD_LOCALS which apppears above in this file.  It
+      // ENABLE_ZAP_DEAD_LOCALS which appears above in this file.  It
       // should be factored out.  Or maybe dispersed to the nodes?
 
       // Special handling for SafePoint/Call Nodes
@@ -1275,7 +1275,7 @@
         }
 
 #ifdef ASSERT
-        // Check that oop-store preceeds the card-mark
+        // Check that oop-store precedes the card-mark
         else if( mach->ideal_Opcode() == Op_StoreCM ) {
           uint storeCM_idx = j;
           Node *oop_store = mach->in(mach->_cnt);  // First precedence edge
@@ -1291,7 +1291,7 @@
 #endif
 
         else if( !n->is_Proj() ) {
-          // Remember the begining of the previous instruction, in case
+          // Remember the beginning of the previous instruction, in case
           // it's followed by a flag-kill and a null-check.  Happens on
           // Intel all the time, with add-to-memory kind of opcodes.
           previous_offset = current_offset;
@@ -1567,7 +1567,7 @@
 
   compile.set_node_bundling_limit(_node_bundling_limit);
 
-  // This one is persistant within the Compile class
+  // This one is persistent within the Compile class
   _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max);
 
   // Allocate space for fixed-size arrays
@@ -1666,7 +1666,7 @@
 // Compute the latency of all the instructions.  This is fairly simple,
 // because we already have a legal ordering.  Walk over the instructions
 // from first to last, and compute the latency of the instruction based
-// on the latency of the preceeding instruction(s).
+// on the latency of the preceding instruction(s).
 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
 #ifndef PRODUCT
   if (_cfg->C->trace_opto_output())
@@ -1931,7 +1931,7 @@
     uint siz = _available.size();
 
     // Conditional branches can support an instruction that
-    // is unconditionally executed and not dependant by the
+    // is unconditionally executed and not dependent by the
     // branch, OR a conditionally executed instruction if
     // the branch is taken.  In practice, this means that
     // the first instruction at the branch target is
@@ -1947,7 +1947,7 @@
 #endif
 
       // At least 1 instruction is on the available list
-      // that is not dependant on the branch
+      // that is not dependent on the branch
       for (uint i = 0; i < siz; i++) {
         Node *d = _available[i];
         const Pipeline *avail_pipeline = d->pipeline();
--- a/hotspot/src/share/vm/opto/parse.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/parse.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -78,7 +78,7 @@
   };
 
   // See if it is OK to inline.
-  // The reciever is the inline tree for the caller.
+  // The receiver is the inline tree for the caller.
   //
   // The result is a temperature indication.  If it is hot or cold,
   // inlining is immediate or undesirable.  Otherwise, the info block
--- a/hotspot/src/share/vm/opto/parse1.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -607,7 +607,7 @@
       if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) {
         // In the absence of irreducible loops, the Region and Phis
         // associated with a merge that doesn't involve a backedge can
-        // be simplfied now since the RPO parsing order guarantees
+        // be simplified now since the RPO parsing order guarantees
         // that any path which was supposed to reach here has already
         // been parsed or must be dead.
         Node* c = control();
--- a/hotspot/src/share/vm/opto/parse2.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/parse2.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -32,7 +32,7 @@
 void Parse::array_load(BasicType elem_type) {
   const Type* elem = Type::TOP;
   Node* adr = array_addressing(elem_type, 0, &elem);
-  if (stopped())  return;     // guarenteed null or range check
+  if (stopped())  return;     // guaranteed null or range check
   _sp -= 2;                   // Pop array and index
   const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
   Node* ld = make_load(control(), adr, elem, elem_type, adr_type);
@@ -43,7 +43,7 @@
 //--------------------------------array_store----------------------------------
 void Parse::array_store(BasicType elem_type) {
   Node* adr = array_addressing(elem_type, 1);
-  if (stopped())  return;     // guarenteed null or range check
+  if (stopped())  return;     // guaranteed null or range check
   Node* val = pop();
   _sp -= 2;                   // Pop array and index
   const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
@@ -1541,14 +1541,14 @@
   case Bytecodes::_aaload: array_load(T_OBJECT); break;
   case Bytecodes::_laload: {
     a = array_addressing(T_LONG, 0);
-    if (stopped())  return;     // guarenteed null or range check
+    if (stopped())  return;     // guaranteed null or range check
     _sp -= 2;                   // Pop array and index
     push_pair( make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS));
     break;
   }
   case Bytecodes::_daload: {
     a = array_addressing(T_DOUBLE, 0);
-    if (stopped())  return;     // guarenteed null or range check
+    if (stopped())  return;     // guaranteed null or range check
     _sp -= 2;                   // Pop array and index
     push_pair( make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES));
     break;
@@ -1560,7 +1560,7 @@
   case Bytecodes::_fastore: array_store(T_FLOAT); break;
   case Bytecodes::_aastore: {
     d = array_addressing(T_OBJECT, 1);
-    if (stopped())  return;     // guarenteed null or range check
+    if (stopped())  return;     // guaranteed null or range check
     array_store_check();
     c = pop();                  // Oop to store
     b = pop();                  // index (already used)
@@ -1572,7 +1572,7 @@
   }
   case Bytecodes::_lastore: {
     a = array_addressing(T_LONG, 2);
-    if (stopped())  return;     // guarenteed null or range check
+    if (stopped())  return;     // guaranteed null or range check
     c = pop_pair();
     _sp -= 2;                   // Pop array and index
     store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS);
@@ -1580,7 +1580,7 @@
   }
   case Bytecodes::_dastore: {
     a = array_addressing(T_DOUBLE, 2);
-    if (stopped())  return;     // guarenteed null or range check
+    if (stopped())  return;     // guaranteed null or range check
     c = pop_pair();
     _sp -= 2;                   // Pop array and index
     c = dstore_rounding(c);
--- a/hotspot/src/share/vm/opto/phase.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/phase.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -73,7 +73,7 @@
 
 //------------------------------Phase------------------------------------------
 Phase::Phase( PhaseNumber pnum ) : _pnum(pnum), C( pnum == Compiler ? NULL : Compile::current()) {
-  // Poll for requests from shutdown mechanism to quiesce comiler (4448539, 4448544).
+  // Poll for requests from shutdown mechanism to quiesce compiler (4448539, 4448544).
   // This is an effective place to poll, since the compiler is full of phases.
   // In particular, every inlining site uses a recursively created Parse phase.
   CompileBroker::maybe_block();
--- a/hotspot/src/share/vm/opto/phaseX.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/phaseX.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -196,7 +196,7 @@
 }
 
 //------------------------------hash_delete------------------------------------
-// Replace in hash table with sentinal
+// Replace in hash table with sentinel
 bool NodeHash::hash_delete( const Node *n ) {
   Node *k;
   uint hash = n->hash();
@@ -207,7 +207,7 @@
   uint key = hash & (_max-1);
   uint stride = key | 0x01;
   debug_only( uint counter = 0; );
-  for( ; /* (k != NULL) && (k != _sentinal) */; ) {
+  for( ; /* (k != NULL) && (k != _sentinel) */; ) {
     debug_only( counter++ );
     debug_only( _delete_probes++ );
     k = _table[key];            // Get hashed value
@@ -715,7 +715,7 @@
 
 #ifdef ASSERT
 //------------------------------dead_loop_check--------------------------------
-// Check for a simple dead loop when a data node references itself direcly
+// Check for a simple dead loop when a data node references itself directly
 // or through an other data node excluding cons and phis.
 void PhaseGVN::dead_loop_check( Node *n ) {
   // Phi may reference itself in a loop
@@ -1359,7 +1359,7 @@
               worklist.push(p); // Propagate change to user
           }
         }
-        // If we changed the reciever type to a call, we need to revisit
+        // If we changed the receiver type to a call, we need to revisit
         // the Catch following the call.  It's looking for a non-NULL
         // receiver to know when to enable the regular fall-through path
         // in addition to the NullPtrException path
--- a/hotspot/src/share/vm/opto/postaloc.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/postaloc.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -46,7 +46,7 @@
   // be splitting live ranges for callee save registers to such
   // an extent that in large methods the chains can be very long
   // (50+). The conservative answer is to return true if we don't
-  // know as this prevents optimizations from occuring.
+  // know as this prevents optimizations from occurring.
 
   const int limit = 60;
   int i;
@@ -286,7 +286,7 @@
     //
     // n will be replaced with the old value but n might have
     // kills projections associated with it so remove them now so that
-    // yank_if_dead will be able to elminate the copy once the uses
+    // yank_if_dead will be able to eliminate the copy once the uses
     // have been transferred to the old[value].
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node* use = n->fast_out(i);
--- a/hotspot/src/share/vm/opto/reg_split.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/reg_split.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -26,8 +26,8 @@
 #include "incls/_reg_split.cpp.incl"
 
 //------------------------------Split--------------------------------------
-// Walk the graph in RPO and for each lrg which spills, propogate reaching
-// definitions.  During propogation, split the live range around regions of
+// Walk the graph in RPO and for each lrg which spills, propagate reaching
+// definitions.  During propagation, split the live range around regions of
 // High Register Pressure (HRP).  If a Def is in a region of Low Register
 // Pressure (LRP), it will not get spilled until we encounter a region of
 // HRP between it and one of its uses.  We will spill at the transition
@@ -88,7 +88,7 @@
 }
 
 //------------------------------insert_proj------------------------------------
-// Insert the spill at chosen location.  Skip over any interveneing Proj's or
+// Insert the spill at chosen location.  Skip over any intervening Proj's or
 // Phis.  Skip over a CatchNode and projs, inserting in the fall-through block
 // instead.  Update high-pressure indices.  Create a new live range.
 void PhaseChaitin::insert_proj( Block *b, uint i, Node *spill, uint maxlrg ) {
@@ -125,7 +125,7 @@
 }
 
 //------------------------------split_DEF--------------------------------------
-// There are four catagories of Split; UP/DOWN x DEF/USE
+// There are four categories of Split; UP/DOWN x DEF/USE
 // Only three of these really occur as DOWN/USE will always color
 // Any Split with a DEF cannot CISC-Spill now.  Thus we need
 // two helper routines, one for Split DEFS (insert after instruction),
@@ -726,7 +726,7 @@
       // ********** Handle Crossing HRP Boundry **********
       if( (insidx == b->_ihrp_index) || (insidx == b->_fhrp_index) ) {
         for( slidx = 0; slidx < spill_cnt; slidx++ ) {
-          // Check for need to split at HRP boundry - split if UP
+          // Check for need to split at HRP boundary - split if UP
           n1 = Reachblock[slidx];
           // bail out if no reaching DEF
           if( n1 == NULL ) continue;
--- a/hotspot/src/share/vm/opto/runtime.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1196,7 +1196,7 @@
 
 // The following does not work because for one thing, the
 // thread state is wrong; it expects java, but it is native.
-// Also, the invarients in a native stub are different and
+// Also, the invariants in a native stub are different and
 // I'm not sure it is safe to have a MachCalRuntimeDirectNode
 // in there.
 // So for now, we do not zap in native stubs.
--- a/hotspot/src/share/vm/opto/split_if.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/split_if.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -318,7 +318,7 @@
 
   if( use->is_Phi() ) {         // Phi uses in prior block
     // Grab the first Phi use; there may be many.
-    // Each will be handled as a seperate iteration of
+    // Each will be handled as a separate iteration of
     // the "while( phi->outcnt() )" loop.
     uint j;
     for( j = 1; j < use->req(); j++ )
--- a/hotspot/src/share/vm/opto/superword.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/superword.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -470,7 +470,7 @@
 }
 
 //------------------------------stmts_can_pack---------------------------
-// Can s1 and s2 be in a pack with s1 immediately preceeding s2 and
+// Can s1 and s2 be in a pack with s1 immediately preceding s2 and
 // s1 aligned at "align"
 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
   if (isomorphic(s1, s2)) {
@@ -869,7 +869,7 @@
   for (uint i = start; i < end; i++) {
     if (!is_vector_use(p0, i)) {
       // For now, return false if not scalar promotion case (inputs are the same.)
-      // Later, implement PackNode and allow differring, non-vector inputs
+      // Later, implement PackNode and allow differing, non-vector inputs
       // (maybe just the ones from outside the block.)
       Node* p0_def = p0->in(i);
       for (uint j = 1; j < p->size(); j++) {
--- a/hotspot/src/share/vm/opto/superword.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/superword.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -308,7 +308,7 @@
   void dependence_graph();
   // Return a memory slice (node list) in predecessor order starting at "start"
   void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds);
-  // Can s1 and s2 be in a pack with s1 immediately preceeding s2 and  s1 aligned at "align"
+  // Can s1 and s2 be in a pack with s1 immediately preceding s2 and  s1 aligned at "align"
   bool stmts_can_pack(Node* s1, Node* s2, int align);
   // Does s exist in a pack at position pos?
   bool exists_at(Node* s, uint pos);
--- a/hotspot/src/share/vm/opto/type.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/type.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -226,6 +226,7 @@
   TypeInt::CC_LE   = TypeInt::make(-1, 0, WidenMin);
   TypeInt::CC_GE   = TypeInt::make( 0, 1, WidenMin);  // == TypeInt::BOOL
   TypeInt::BYTE    = TypeInt::make(-128,127,     WidenMin); // Bytes
+  TypeInt::UBYTE   = TypeInt::make(0, 255,       WidenMin); // Unsigned Bytes
   TypeInt::CHAR    = TypeInt::make(0,65535,      WidenMin); // Java chars
   TypeInt::SHORT   = TypeInt::make(-32768,32767, WidenMin); // Java shorts
   TypeInt::POS     = TypeInt::make(0,max_jint,   WidenMin); // Non-neg values
@@ -1022,6 +1023,7 @@
 const TypeInt *TypeInt::CC_LE;  // [-1,0]
 const TypeInt *TypeInt::CC_GE;  // [0,1] == BOOL (!)
 const TypeInt *TypeInt::BYTE;   // Bytes, -128 to 127
+const TypeInt *TypeInt::UBYTE;  // Unsigned Bytes, 0 to 255
 const TypeInt *TypeInt::CHAR;   // Java chars, 0-65535
 const TypeInt *TypeInt::SHORT;  // Java shorts, -32768-32767
 const TypeInt *TypeInt::POS;    // Positive 32-bit integers or zero
@@ -2455,7 +2457,7 @@
     // code and dereferenced at the time the nmethod is made.  Until that time,
     // it is not reasonable to do arithmetic with the addresses of oops (we don't
     // have access to the addresses!).  This does not seem to currently happen,
-    // but this assertion here is to help prevent its occurrance.
+    // but this assertion here is to help prevent its occurence.
     tty->print_cr("Found oop constant with non-zero offset");
     ShouldNotReachHere();
   }
@@ -2761,7 +2763,7 @@
       // LCA is object_klass, but if we subclass from the top we can do better
       if( above_centerline(_ptr) ) { // if( _ptr == TopPTR || _ptr == AnyNull )
         // If 'this' (InstPtr) is above the centerline and it is Object class
-        // then we can subclass in the Java class heirarchy.
+        // then we can subclass in the Java class hierarchy.
         if (klass()->equals(ciEnv::current()->Object_klass())) {
           // that is, tp's array type is a subtype of my klass
           return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id);
@@ -3022,7 +3024,7 @@
 
 //------------------------------xdual------------------------------------------
 // Dual: do NOT dual on klasses.  This means I do NOT understand the Java
-// inheritence mechanism.
+// inheritance mechanism.
 const Type *TypeInstPtr::xdual() const {
   return new TypeInstPtr( dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id()  );
 }
@@ -3176,7 +3178,7 @@
   bool chg = false;
   if (lo < min_lo) { lo = min_lo; chg = true; }
   if (hi > max_hi) { hi = max_hi; chg = true; }
-  // Negative length arrays will produce weird intermediate dead fath-path code
+  // Negative length arrays will produce weird intermediate dead fast-path code
   if (lo > hi)
     return TypeInt::ZERO;
   if (!chg)
@@ -3358,7 +3360,7 @@
       // LCA is object_klass, but if we subclass from the top we can do better
       if (above_centerline(tp->ptr())) {
         // If 'tp'  is above the centerline and it is Object class
-        // then we can subclass in the Java class heirarchy.
+        // then we can subclass in the Java class hierarchy.
         if( tp->klass()->equals(ciEnv::current()->Object_klass()) ) {
           // that is, my array type is a subtype of 'tp' klass
           return make( ptr, _ary, _klass, _klass_is_exact, offset, instance_id );
--- a/hotspot/src/share/vm/opto/type.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/opto/type.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -415,6 +415,7 @@
   static const TypeInt *CC_LE;  // [-1,0]
   static const TypeInt *CC_GE;  // [0,1] == BOOL (!)
   static const TypeInt *BYTE;
+  static const TypeInt *UBYTE;
   static const TypeInt *CHAR;
   static const TypeInt *SHORT;
   static const TypeInt *POS;
--- a/hotspot/src/share/vm/prims/jvmtiEnv.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiEnv.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -99,6 +99,9 @@
     }
     // otherwise, create the state
     state = JvmtiThreadState::state_for(java_thread);
+    if (state == NULL) {
+      return JVMTI_ERROR_THREAD_NOT_ALIVE;
+    }
   }
   state->env_thread_state(this)->set_agent_thread_local_storage_data((void*)data);
   return JVMTI_ERROR_NONE;
@@ -1308,6 +1311,9 @@
 
   // retrieve or create JvmtiThreadState.
   JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread);
+  if (state == NULL) {
+    return JVMTI_ERROR_THREAD_NOT_ALIVE;
+  }
   uint32_t debug_bits = 0;
   if (is_thread_fully_suspended(java_thread, true, &debug_bits)) {
     err = get_frame_count(state, count_ptr);
@@ -1329,6 +1335,12 @@
   HandleMark hm(current_thread);
   uint32_t debug_bits = 0;
 
+  // retrieve or create the state
+  JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread);
+  if (state == NULL) {
+    return JVMTI_ERROR_THREAD_NOT_ALIVE;
+  }
+
   // Check if java_thread is fully suspended
   if (!is_thread_fully_suspended(java_thread, true /* wait for suspend completion */, &debug_bits)) {
     return JVMTI_ERROR_THREAD_NOT_SUSPENDED;
@@ -1399,9 +1411,6 @@
     // It's fine to update the thread state here because no JVMTI events
     // shall be posted for this PopFrame.
 
-    // retreive or create the state
-    JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread);
-
     state->update_for_pop_top_frame();
     java_thread->set_popframe_condition(JavaThread::popframe_pending_bit);
     // Set pending step flag for this popframe and it is cleared when next
@@ -1445,6 +1454,11 @@
   ResourceMark rm;
   uint32_t debug_bits = 0;
 
+  JvmtiThreadState *state = JvmtiThreadState::state_for(java_thread);
+  if (state == NULL) {
+    return JVMTI_ERROR_THREAD_NOT_ALIVE;
+  }
+
   if (!JvmtiEnv::is_thread_fully_suspended(java_thread, true, &debug_bits)) {
       return JVMTI_ERROR_THREAD_NOT_SUSPENDED;
   }
@@ -1464,7 +1478,6 @@
 
   assert(vf->frame_pointer() != NULL, "frame pointer mustn't be NULL");
 
-  JvmtiThreadState *state = JvmtiThreadState::state_for(java_thread);
   int frame_number = state->count_frames() - depth;
   state->env_thread_state(this)->set_frame_pop(frame_number);
 
--- a/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -94,6 +94,35 @@
 }
 
 
+bool
+JvmtiEnvBase::is_valid() {
+  jint value = 0;
+
+  // This object might not be a JvmtiEnvBase so we can't assume
+  // the _magic field is properly aligned. Get the value in a safe
+  // way and then check against JVMTI_MAGIC.
+
+  switch (sizeof(_magic)) {
+  case 2:
+    value = Bytes::get_native_u2((address)&_magic);
+    break;
+
+  case 4:
+    value = Bytes::get_native_u4((address)&_magic);
+    break;
+
+  case 8:
+    value = Bytes::get_native_u8((address)&_magic);
+    break;
+
+  default:
+    guarantee(false, "_magic field is an unexpected size");
+  }
+
+  return value == JVMTI_MAGIC;
+}
+
+
 JvmtiEnvBase::JvmtiEnvBase() : _env_event_enable() {
   _env_local_storage = NULL;
   _tag_map = NULL;
@@ -1322,6 +1351,12 @@
   HandleMark   hm(current_thread);
   uint32_t debug_bits = 0;
 
+  // retrieve or create the state
+  JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread);
+  if (state == NULL) {
+    return JVMTI_ERROR_THREAD_NOT_ALIVE;
+  }
+
   // Check if java_thread is fully suspended
   if (!is_thread_fully_suspended(java_thread,
                                  true /* wait for suspend completion */,
@@ -1329,9 +1364,6 @@
     return JVMTI_ERROR_THREAD_NOT_SUSPENDED;
   }
 
-  // retreive or create the state
-  JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread);
-
   // Check to see if a ForceEarlyReturn was already in progress
   if (state->is_earlyret_pending()) {
     // Probably possible for JVMTI clients to trigger this, but the
--- a/hotspot/src/share/vm/prims/jvmtiEnvBase.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiEnvBase.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -120,7 +120,7 @@
 
  public:
 
-  bool is_valid()                                  { return _magic == JVMTI_MAGIC; }
+  bool is_valid();
 
   bool is_retransformable()                        { return _is_retransformable; }
 
--- a/hotspot/src/share/vm/prims/jvmtiEventController.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiEventController.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -478,6 +478,11 @@
 // set external state accordingly.  Only thread-filtered events are included.
 jlong
 JvmtiEventControllerPrivate::recompute_thread_enabled(JvmtiThreadState *state) {
+  if (state == NULL) {
+    // associated JavaThread is exiting
+    return (jlong)0;
+  }
+
   jlong was_any_env_enabled = state->thread_event_enable()->_event_enabled.get_bits();
   jlong any_env_enabled = 0;
 
@@ -553,6 +558,7 @@
     {
       MutexLocker mu(Threads_lock);   //hold the Threads_lock for the iteration
       for (JavaThread *tp = Threads::first(); tp != NULL; tp = tp->next()) {
+        // state_for_while_locked() makes tp->is_exiting() check
         JvmtiThreadState::state_for_while_locked(tp);  // create the thread state if missing
       }
     }// release Threads_lock
--- a/hotspot/src/share/vm/prims/jvmtiExport.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiExport.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1872,6 +1872,9 @@
 {
   // register the stub with the current dynamic code event collector
   JvmtiThreadState* state = JvmtiThreadState::state_for(JavaThread::current());
+  // state can only be NULL if the current thread is exiting which
+  // should not happen since we're trying to post an event
+  guarantee(state != NULL, "attempt to register stub via an exiting thread");
   JvmtiDynamicCodeEventCollector* collector = state->get_dynamic_code_event_collector();
   guarantee(collector != NULL, "attempt to register stub without event collector");
   collector->register_stub(name, code_begin, code_end);
@@ -2253,6 +2256,9 @@
 void JvmtiEventCollector::setup_jvmti_thread_state() {
   // set this event collector to be the current one.
   JvmtiThreadState* state = JvmtiThreadState::state_for(JavaThread::current());
+  // state can only be NULL if the current thread is exiting which
+  // should not happen since we're trying to configure for event collection
+  guarantee(state != NULL, "exiting thread called setup_jvmti_thread_state");
   if (is_vm_object_alloc_event()) {
     _prev = state->get_vm_object_alloc_event_collector();
     state->set_vm_object_alloc_event_collector((JvmtiVMObjectAllocEventCollector *)this);
--- a/hotspot/src/share/vm/prims/jvmtiImpl.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiImpl.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -238,6 +238,35 @@
 }
 
 
+bool
+JvmtiRawMonitor::is_valid() {
+  int value = 0;
+
+  // This object might not be a JvmtiRawMonitor so we can't assume
+  // the _magic field is properly aligned. Get the value in a safe
+  // way and then check against JVMTI_RM_MAGIC.
+
+  switch (sizeof(_magic)) {
+  case 2:
+    value = Bytes::get_native_u2((address)&_magic);
+    break;
+
+  case 4:
+    value = Bytes::get_native_u4((address)&_magic);
+    break;
+
+  case 8:
+    value = Bytes::get_native_u8((address)&_magic);
+    break;
+
+  default:
+    guarantee(false, "_magic field is an unexpected size");
+  }
+
+  return value == JVMTI_RM_MAGIC;
+}
+
+
 //
 // class JvmtiBreakpoint
 //
--- a/hotspot/src/share/vm/prims/jvmtiImpl.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiImpl.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -349,7 +349,7 @@
   ~JvmtiRawMonitor();
   int            magic()   { return _magic;  }
   const char *get_name()   { return _name; }
-  bool        is_valid()   { return _magic == JVMTI_RM_MAGIC;  }
+  bool        is_valid();
 };
 
 // Onload pending raw monitors
--- a/hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -831,6 +831,9 @@
   ResourceMark rm(THREAD);
 
   JvmtiThreadState *state = JvmtiThreadState::state_for(JavaThread::current());
+  // state can only be NULL if the current thread is exiting which
+  // should not happen since we're trying to do a RedefineClasses
+  guarantee(state != NULL, "exiting thread calling load_new_class_versions");
   for (int i = 0; i < _class_count; i++) {
     oop mirror = JNIHandles::resolve_non_null(_class_defs[i].klass);
     // classes for primitives cannot be redefined
@@ -1349,39 +1352,39 @@
 
   // rewrite constant pool references in the methods:
   if (!rewrite_cp_refs_in_methods(scratch_class, THREAD)) {
-    // propogate failure back to caller
+    // propagate failure back to caller
     return false;
   }
 
   // rewrite constant pool references in the class_annotations:
   if (!rewrite_cp_refs_in_class_annotations(scratch_class, THREAD)) {
-    // propogate failure back to caller
+    // propagate failure back to caller
     return false;
   }
 
   // rewrite constant pool references in the fields_annotations:
   if (!rewrite_cp_refs_in_fields_annotations(scratch_class, THREAD)) {
-    // propogate failure back to caller
+    // propagate failure back to caller
     return false;
   }
 
   // rewrite constant pool references in the methods_annotations:
   if (!rewrite_cp_refs_in_methods_annotations(scratch_class, THREAD)) {
-    // propogate failure back to caller
+    // propagate failure back to caller
     return false;
   }
 
   // rewrite constant pool references in the methods_parameter_annotations:
   if (!rewrite_cp_refs_in_methods_parameter_annotations(scratch_class,
          THREAD)) {
-    // propogate failure back to caller
+    // propagate failure back to caller
     return false;
   }
 
   // rewrite constant pool references in the methods_default_annotations:
   if (!rewrite_cp_refs_in_methods_default_annotations(scratch_class,
          THREAD)) {
-    // propogate failure back to caller
+    // propagate failure back to caller
     return false;
   }
 
@@ -1600,7 +1603,7 @@
            byte_i_ref, THREAD)) {
       RC_TRACE_WITH_THREAD(0x02000000, THREAD,
         ("bad annotation_struct at %d", calc_num_annotations));
-      // propogate failure back to caller
+      // propagate failure back to caller
       return false;
     }
   }
@@ -1666,7 +1669,7 @@
            byte_i_ref, THREAD)) {
       RC_TRACE_WITH_THREAD(0x02000000, THREAD,
         ("bad element_value at %d", calc_num_element_value_pairs));
-      // propogate failure back to caller
+      // propagate failure back to caller
       return false;
     }
   } // end for each component
@@ -1815,7 +1818,7 @@
       // field. This is a nested annotation.
       if (!rewrite_cp_refs_in_annotation_struct(annotations_typeArray,
              byte_i_ref, THREAD)) {
-        // propogate failure back to caller
+        // propagate failure back to caller
         return false;
       }
       break;
@@ -1842,7 +1845,7 @@
                annotations_typeArray, byte_i_ref, THREAD)) {
           RC_TRACE_WITH_THREAD(0x02000000, THREAD,
             ("bad nested element_value at %d", calc_num_values));
-          // propogate failure back to caller
+          // propagate failure back to caller
           return false;
         }
       }
@@ -1886,7 +1889,7 @@
            THREAD)) {
       RC_TRACE_WITH_THREAD(0x02000000, THREAD,
         ("bad field_annotations at %d", i));
-      // propogate failure back to caller
+      // propagate failure back to caller
       return false;
     }
   }
@@ -1923,7 +1926,7 @@
            THREAD)) {
       RC_TRACE_WITH_THREAD(0x02000000, THREAD,
         ("bad method_annotations at %d", i));
-      // propogate failure back to caller
+      // propagate failure back to caller
       return false;
     }
   }
@@ -1991,7 +1994,7 @@
              method_parameter_annotations, byte_i, THREAD)) {
         RC_TRACE_WITH_THREAD(0x02000000, THREAD,
           ("bad method_parameter_annotations at %d", calc_num_parameters));
-        // propogate failure back to caller
+        // propagate failure back to caller
         return false;
       }
     }
@@ -2041,7 +2044,7 @@
            method_default_annotations, byte_i, THREAD)) {
       RC_TRACE_WITH_THREAD(0x02000000, THREAD,
         ("bad default element_value at %d", i));
-      // propogate failure back to caller
+      // propagate failure back to caller
       return false;
     }
   }
--- a/hotspot/src/share/vm/prims/jvmtiRedefineClassesTrace.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiRedefineClassesTrace.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -49,8 +49,8 @@
 //    0x00000400 |       1024 - previous class weak reference mgmt during
 //                              add previous ops (GC)
 //    0x00000800 |       2048 - previous class breakpoint mgmt
-//    0x00001000 |       4096 - unused
-//    0x00002000 |       8192 - unused
+//    0x00001000 |       4096 - detect calls to obsolete methods
+//    0x00002000 |       8192 - fail a guarantee() in addition to detection
 //    0x00004000 |      16384 - unused
 //    0x00008000 |      32768 - old/new method matching/add/delete
 //    0x00010000 |      65536 - impl details: CP size info
--- a/hotspot/src/share/vm/prims/jvmtiThreadState.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiThreadState.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -314,17 +314,24 @@
   void update_for_pop_top_frame();
 
   // already holding JvmtiThreadState_lock - retrieve or create JvmtiThreadState
+  // Can return NULL if JavaThread is exiting.
   inline static JvmtiThreadState *state_for_while_locked(JavaThread *thread) {
     assert(JvmtiThreadState_lock->is_locked(), "sanity check");
 
     JvmtiThreadState *state = thread->jvmti_thread_state();
     if (state == NULL) {
+      if (thread->is_exiting()) {
+        // don't add a JvmtiThreadState to a thread that is exiting
+        return NULL;
+      }
+
       state = new JvmtiThreadState(thread);
     }
     return state;
   }
 
   // retrieve or create JvmtiThreadState
+  // Can return NULL if JavaThread is exiting.
   inline static JvmtiThreadState *state_for(JavaThread *thread) {
     JvmtiThreadState *state = thread->jvmti_thread_state();
     if (state == NULL) {
--- a/hotspot/src/share/vm/runtime/extendedPC.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/extendedPC.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -23,7 +23,7 @@
  */
 
 // An ExtendedPC contains the _pc from a signal handler in a platform
-// independant way.
+// independent way.
 
 class ExtendedPC VALUE_OBJ_CLASS_SPEC {
  private:
--- a/hotspot/src/share/vm/runtime/fprofiler.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/fprofiler.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -988,7 +988,7 @@
 
 
 void ThreadProfiler::record_tick_for_running_frame(JavaThread* thread, frame fr) {
-  // The tick happend in real code -> non VM code
+  // The tick happened in real code -> non VM code
   if (fr.is_interpreted_frame()) {
     interval_data_ref()->inc_interpreted();
     record_interpreted_tick(thread, fr, tp_code, FlatProfiler::bytecode_ticks);
@@ -1019,7 +1019,7 @@
 }
 
 void ThreadProfiler::record_tick_for_calling_frame(JavaThread* thread, frame fr) {
-  // The tick happend in VM code
+  // The tick happened in VM code
   interval_data_ref()->inc_native();
   if (fr.is_interpreted_frame()) {
     record_interpreted_tick(thread, fr, tp_native, FlatProfiler::bytecode_ticks_stub);
--- a/hotspot/src/share/vm/runtime/frame.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/frame.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -930,7 +930,7 @@
         // => process callee's arguments
         //
         // Note: The expression stack can be empty if an exception
-        //       occured during method resolution/execution. In all
+        //       occurred during method resolution/execution. In all
         //       cases we empty the expression stack completely be-
         //       fore handling the exception (the exception handling
         //       code in the interpreter calls a blocking runtime
--- a/hotspot/src/share/vm/runtime/frame.inline.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/frame.inline.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -22,7 +22,7 @@
  *
  */
 
-// This file holds platform-independant bodies of inline functions for frames.
+// This file holds platform-independent bodies of inline functions for frames.
 
 // Note: The bcx usually contains the bcp; however during GC it contains the bci
 //       (changed by gc_prologue() and gc_epilogue()) to be methodOop position
--- a/hotspot/src/share/vm/runtime/globals.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -662,6 +662,12 @@
   product(ccstrlist, OnOutOfMemoryError, "",                                \
           "Run user-defined commands on first java.lang.OutOfMemoryError")  \
                                                                             \
+  manageable(bool, HeapDumpBeforeFullGC, false,                             \
+          "Dump heap to file before any major stop-world GC")               \
+                                                                            \
+  manageable(bool, HeapDumpAfterFullGC, false,                              \
+          "Dump heap to file after any major stop-world GC")                \
+                                                                            \
   manageable(bool, HeapDumpOnOutOfMemoryError, false,                       \
           "Dump heap to file when java.lang.OutOfMemoryError is thrown")    \
                                                                             \
@@ -1971,6 +1977,12 @@
   product(bool, PrintHeapAtSIGBREAK, true,                                  \
           "Print heap layout in response to SIGBREAK")                      \
                                                                             \
+  manageable(bool, PrintClassHistogramBeforeFullGC, false,                  \
+          "Print a class histogram before any major stop-world GC")         \
+                                                                            \
+  manageable(bool, PrintClassHistogramAfterFullGC, false,                   \
+          "Print a class histogram after any major stop-world GC")          \
+                                                                            \
   manageable(bool, PrintClassHistogram, false,                              \
           "Print a histogram of class instances")                           \
                                                                             \
--- a/hotspot/src/share/vm/runtime/mutex.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/mutex.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -82,7 +82,7 @@
 // *in that order*.  If their implementations change such that these
 // assumptions are violated, a whole lot of code will break.
 
-// The default length of monitor name is choosen to be 64 to avoid false sharing.
+// The default length of monitor name is chosen to be 64 to avoid false sharing.
 static const int MONITOR_NAME_LEN = 64;
 
 class Monitor : public CHeapObj {
--- a/hotspot/src/share/vm/runtime/orderAccess.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -31,7 +31,7 @@
 // at runtime.
 //
 // In the following, the terms 'previous', 'subsequent', 'before',
-// 'after', 'preceeding' and 'succeeding' refer to program order.  The
+// 'after', 'preceding' and 'succeeding' refer to program order.  The
 // terms 'down' and 'below' refer to forward load or store motion
 // relative to program order, while 'up' and 'above' refer to backward
 // motion.
--- a/hotspot/src/share/vm/runtime/os.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/os.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -207,7 +207,8 @@
         VMThread::execute(&op1);
         Universe::print_heap_at_SIGBREAK();
         if (PrintClassHistogram) {
-          VM_GC_HeapInspection op1(gclog_or_tty, true /* force full GC before heap inspection */);
+          VM_GC_HeapInspection op1(gclog_or_tty, true /* force full GC before heap inspection */,
+                                   true /* need_prologue */);
           VMThread::execute(&op1);
         }
         if (JvmtiExport::should_post_data_dump()) {
@@ -943,7 +944,7 @@
   assert(StackRedPages > 0 && StackYellowPages > 0,"Sanity check");
   address sp = current_stack_pointer();
   // Check if we have StackShadowPages above the yellow zone.  This parameter
-  // is dependant on the depth of the maximum VM call stack possible from
+  // is dependent on the depth of the maximum VM call stack possible from
   // the handler for stack overflow.  'instanceof' in the stack overflow
   // handler or a println uses at least 8k stack of VM and native code
   // respectively.
--- a/hotspot/src/share/vm/runtime/safepoint.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/safepoint.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -369,7 +369,7 @@
 
     // Start suspended threads
     for(JavaThread *current = Threads::first(); current; current = current->next()) {
-      // A problem occuring on Solaris is when attempting to restart threads
+      // A problem occurring on Solaris is when attempting to restart threads
       // the first #cpus - 1 go well, but then the VMThread is preempted when we get
       // to the next one (since it has been running the longest).  We then have
       // to wait for a cpu to become available before we can continue restarting
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -377,6 +377,32 @@
   throw_and_post_jvmti_exception(thread, h_exception);
 }
 
+// The interpreter code to call this tracing function is only
+// called/generated when TraceRedefineClasses has the right bits
+// set. Since obsolete methods are never compiled, we don't have
+// to modify the compilers to generate calls to this function.
+//
+JRT_LEAF(int, SharedRuntime::rc_trace_method_entry(
+    JavaThread* thread, methodOopDesc* method))
+  assert(RC_TRACE_IN_RANGE(0x00001000, 0x00002000), "wrong call");
+
+  if (method->is_obsolete()) {
+    // We are calling an obsolete method, but this is not necessarily
+    // an error. Our method could have been redefined just after we
+    // fetched the methodOop from the constant pool.
+
+    // RC_TRACE macro has an embedded ResourceMark
+    RC_TRACE_WITH_THREAD(0x00001000, thread,
+                         ("calling obsolete method '%s'",
+                          method->name_and_sig_as_C_string()));
+    if (RC_TRACE_ENABLED(0x00002000)) {
+      // this option is provided to debug calls to obsolete methods
+      guarantee(false, "faulting at call to an obsolete method.");
+    }
+  }
+  return 0;
+JRT_END
+
 // ret_pc points into caller; we are returning caller's exception handler
 // for given exception
 address SharedRuntime::compute_compiled_exc_handler(nmethod* nm, address ret_pc, Handle& exception,
--- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -166,6 +166,9 @@
   static void throw_and_post_jvmti_exception(JavaThread *thread, Handle h_exception);
   static void throw_and_post_jvmti_exception(JavaThread *thread, symbolOop name, const char *message = NULL);
 
+  // RedefineClasses() tracing support for obsolete method entry
+  static int rc_trace_method_entry(JavaThread* thread, methodOopDesc* m);
+
   // To be used as the entry point for unresolved native methods.
   static address native_method_throw_unsatisfied_link_error_entry();
 
--- a/hotspot/src/share/vm/runtime/signature.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/signature.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -266,7 +266,7 @@
 class NativeSignatureIterator: public SignatureIterator {
  private:
   methodHandle _method;
-// We need seperate JNI and Java offset values because in 64 bit mode,
+// We need separate JNI and Java offset values because in 64 bit mode,
 // the argument offsets are not in sync with the Java stack.
 // For example a long takes up 1 "C" stack entry but 2 Java stack entries.
   int          _offset;                // The java stack offset
--- a/hotspot/src/share/vm/runtime/thread.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/thread.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -1345,6 +1345,13 @@
  public:
   // Thread local information maintained by JVMTI.
   void set_jvmti_thread_state(JvmtiThreadState *value)                           { _jvmti_thread_state = value; }
+  // A JvmtiThreadState is lazily allocated. This jvmti_thread_state()
+  // getter is used to get this JavaThread's JvmtiThreadState if it has
+  // one which means NULL can be returned. JvmtiThreadState::state_for()
+  // is used to get the specified JavaThread's JvmtiThreadState if it has
+  // one or it allocates a new JvmtiThreadState for the JavaThread and
+  // returns it. JvmtiThreadState::state_for() will return NULL only if
+  // the specified JavaThread is exiting.
   JvmtiThreadState *jvmti_thread_state() const                                   { return _jvmti_thread_state; }
   static ByteSize jvmti_thread_state_offset()                                    { return byte_offset_of(JavaThread, _jvmti_thread_state); }
   void set_jvmti_get_loaded_classes_closure(JvmtiGetLoadedClassesClosure* value) { _jvmti_get_loaded_classes_closure = value; }
--- a/hotspot/src/share/vm/runtime/threadCritical.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/runtime/threadCritical.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -29,7 +29,7 @@
 //
 // Due to race conditions during vm exit, some of the os level
 // synchronization primitives may not be deallocated at exit. It
-// is a good plan to implement the platform dependant sections of
+// is a good plan to implement the platform dependent sections of
 // code with resources that are recoverable during process
 // cleanup by the os. Calling the initialize method before use
 // is also problematic, it is best to use preinitialized primitives
--- a/hotspot/src/share/vm/services/attachListener.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/services/attachListener.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -194,7 +194,7 @@
     }
     live_objects_only = strcmp(arg0, "-live") == 0;
   }
-  VM_GC_HeapInspection heapop(out, live_objects_only /* request gc */);
+  VM_GC_HeapInspection heapop(out, live_objects_only /* request full gc */, true /* need_prologue */);
   VMThread::execute(&heapop);
   return JNI_OK;
 }
--- a/hotspot/src/share/vm/services/heapDumper.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/services/heapDumper.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -347,7 +347,6 @@
   INITIAL_CLASS_COUNT = 200
 };
 
-
 // Supports I/O operations on a dump file
 
 class DumpWriter : public StackObj {
@@ -1303,7 +1302,9 @@
 // The VM operation that performs the heap dump
 class VM_HeapDumper : public VM_GC_Operation {
  private:
-  DumpWriter* _writer;
+  static VM_HeapDumper* _global_dumper;
+  static DumpWriter*    _global_writer;
+  DumpWriter*           _local_writer;
   bool _gc_before_heap_dump;
   bool _is_segmented_dump;
   jlong _dump_start;
@@ -1311,8 +1312,20 @@
   ThreadStackTrace** _stack_traces;
   int _num_threads;
 
-  // accessors
-  DumpWriter* writer() const                    { return _writer; }
+  // accessors and setters
+  static VM_HeapDumper* dumper()         {  assert(_global_dumper != NULL, "Error"); return _global_dumper; }
+  static DumpWriter* writer()            {  assert(_global_writer != NULL, "Error"); return _global_writer; }
+  void set_global_dumper() {
+    assert(_global_dumper == NULL, "Error");
+    _global_dumper = this;
+  }
+  void set_global_writer() {
+    assert(_global_writer == NULL, "Error");
+    _global_writer = _local_writer;
+  }
+  void clear_global_dumper() { _global_dumper = NULL; }
+  void clear_global_writer() { _global_writer = NULL; }
+
   bool is_segmented_dump() const                { return _is_segmented_dump; }
   void set_segmented_dump()                     { _is_segmented_dump = true; }
   jlong dump_start() const                      { return _dump_start; }
@@ -1357,7 +1370,7 @@
     VM_GC_Operation(0 /* total collections,      dummy, ignored */,
                     0 /* total full collections, dummy, ignored */,
                     gc_before_heap_dump) {
-    _writer = writer;
+    _local_writer = writer;
     _gc_before_heap_dump = gc_before_heap_dump;
     _is_segmented_dump = false;
     _dump_start = (jlong)-1;
@@ -1381,6 +1394,9 @@
   void doit();
 };
 
+VM_HeapDumper* VM_HeapDumper::_global_dumper = NULL;
+DumpWriter*    VM_HeapDumper::_global_writer = NULL;
+
 bool VM_HeapDumper::skip_operation() const {
   return false;
 }
@@ -1479,31 +1495,28 @@
 void VM_HeapDumper::do_load_class(klassOop k) {
   static u4 class_serial_num = 0;
 
-  VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation());
-  DumpWriter* writer = dumper->writer();
-
   // len of HPROF_LOAD_CLASS record
   u4 remaining = 2*oopSize + 2*sizeof(u4);
 
   // write a HPROF_LOAD_CLASS for the class and each array class
   do {
-    DumperSupport::write_header(writer, HPROF_LOAD_CLASS, remaining);
+    DumperSupport::write_header(writer(), HPROF_LOAD_CLASS, remaining);
 
     // class serial number is just a number
-    writer->write_u4(++class_serial_num);
+    writer()->write_u4(++class_serial_num);
 
     // class ID
     Klass* klass = Klass::cast(k);
-    writer->write_classID(klass);
+    writer()->write_classID(klass);
 
     // add the klassOop and class serial number pair
-    dumper->add_class_serial_number(klass, class_serial_num);
+    dumper()->add_class_serial_number(klass, class_serial_num);
 
-    writer->write_u4(STACK_TRACE_ID);
+    writer()->write_u4(STACK_TRACE_ID);
 
     // class name ID
     symbolOop name = klass->name();
-    writer->write_objectID(name);
+    writer()->write_objectID(name);
 
     // write a LOAD_CLASS record for the array type (if it exists)
     k = klass->array_klass_or_null();
@@ -1512,17 +1525,13 @@
 
 // writes a HPROF_GC_CLASS_DUMP record for the given class
 void VM_HeapDumper::do_class_dump(klassOop k) {
-  VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation());
-  DumpWriter* writer = dumper->writer();
-  DumperSupport::dump_class_and_array_classes(writer, k);
+  DumperSupport::dump_class_and_array_classes(writer(), k);
 }
 
 // writes a HPROF_GC_CLASS_DUMP records for a given basic type
 // array (and each multi-dimensional array too)
 void VM_HeapDumper::do_basic_type_array_class_dump(klassOop k) {
-  VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation());
-  DumpWriter* writer = dumper->writer();
-  DumperSupport::dump_basic_type_array_class(writer, k);
+  DumperSupport::dump_basic_type_array_class(writer(), k);
 }
 
 // Walk the stack of the given thread.
@@ -1658,6 +1667,11 @@
     ch->ensure_parsability(false);
   }
 
+  // At this point we should be the only dumper active, so
+  // the following should be safe.
+  set_global_dumper();
+  set_global_writer();
+
   // Write the file header - use 1.0.2 for large heaps, otherwise 1.0.1
   size_t used = ch->used();
   const char* header;
@@ -1667,6 +1681,7 @@
   } else {
     header = "JAVA PROFILE 1.0.1";
   }
+
   // header is few bytes long - no chance to overflow int
   writer()->write_raw((void*)header, (int)strlen(header));
   writer()->write_u1(0); // terminator
@@ -1723,6 +1738,10 @@
   // fixes up the length of the dump record. In the case of a segmented
   // heap then the HPROF_HEAP_DUMP_END record is also written.
   end_of_dump();
+
+  // Now we clear the global variables, so that a future dumper might run.
+  clear_global_dumper();
+  clear_global_writer();
 }
 
 void VM_HeapDumper::dump_stack_traces() {
@@ -1790,7 +1809,12 @@
 
   // generate the dump
   VM_HeapDumper dumper(&writer, _gc_before_heap_dump);
-  VMThread::execute(&dumper);
+  if (Thread::current()->is_VM_thread()) {
+    assert(SafepointSynchronize::is_at_safepoint(), "Expected to be called at a safepoint");
+    dumper.doit();
+  } else {
+    VMThread::execute(&dumper);
+  }
 
   // close dump file and record any error that the writer may have encountered
   writer.close();
@@ -1845,49 +1869,68 @@
   }
 }
 
-
-// Called by error reporting
+// Called by error reporting by a single Java thread outside of a JVM safepoint,
+// or by heap dumping by the VM thread during a (GC) safepoint. Thus, these various
+// callers are strictly serialized and guaranteed not to interfere below. For more
+// general use, however, this method will need modification to prevent
+// inteference when updating the static variables base_path and dump_file_seq below.
 void HeapDumper::dump_heap() {
-  static char path[JVM_MAXPATHLEN];
+  static char base_path[JVM_MAXPATHLEN] = {'\0'};
+  static uint dump_file_seq = 0;
+  char   my_path[JVM_MAXPATHLEN] = {'\0'};
 
   // The dump file defaults to java_pid<pid>.hprof in the current working
   // directory. HeapDumpPath=<file> can be used to specify an alternative
   // dump file name or a directory where dump file is created.
-  bool use_default_filename = true;
-  if (HeapDumpPath == NULL || HeapDumpPath[0] == '\0') {
-    path[0] = '\0'; // HeapDumpPath=<file> not specified
-  } else {
-    assert(strlen(HeapDumpPath) < sizeof(path), "HeapDumpPath too long");
-    strcpy(path, HeapDumpPath);
-    // check if the path is a directory (must exist)
-    DIR* dir = os::opendir(path);
-    if (dir == NULL) {
-      use_default_filename = false;
+  if (dump_file_seq == 0) { // first time in, we initialize base_path
+    bool use_default_filename = true;
+    if (HeapDumpPath == NULL || HeapDumpPath[0] == '\0') {
+      // HeapDumpPath=<file> not specified
     } else {
-      // HeapDumpPath specified a directory. We append a file separator
-      // (if needed).
-      os::closedir(dir);
-      size_t fs_len = strlen(os::file_separator());
-      if (strlen(path) >= fs_len) {
-        char* end = path;
-        end += (strlen(path) - fs_len);
-        if (strcmp(end, os::file_separator()) != 0) {
-          assert(strlen(path) + strlen(os::file_separator()) < sizeof(path),
-            "HeapDumpPath too long");
-          strcat(path, os::file_separator());
+      assert(strlen(HeapDumpPath) < sizeof(base_path), "HeapDumpPath too long");
+      strcpy(base_path, HeapDumpPath);
+      // check if the path is a directory (must exist)
+      DIR* dir = os::opendir(base_path);
+      if (dir == NULL) {
+        use_default_filename = false;
+      } else {
+        // HeapDumpPath specified a directory. We append a file separator
+        // (if needed).
+        os::closedir(dir);
+        size_t fs_len = strlen(os::file_separator());
+        if (strlen(base_path) >= fs_len) {
+          char* end = base_path;
+          end += (strlen(base_path) - fs_len);
+          if (strcmp(end, os::file_separator()) != 0) {
+            assert(strlen(base_path) + strlen(os::file_separator()) < sizeof(base_path),
+              "HeapDumpPath too long");
+            strcat(base_path, os::file_separator());
+          }
         }
       }
     }
+    // If HeapDumpPath wasn't a file name then we append the default name
+    if (use_default_filename) {
+      char fn[32];
+      sprintf(fn, "java_pid%d", os::current_process_id());
+      assert(strlen(base_path) + strlen(fn) < sizeof(base_path), "HeapDumpPath too long");
+      strcat(base_path, fn);
+    }
+    assert(strlen(base_path) < sizeof(my_path), "Buffer too small");
+    strcpy(my_path, base_path);
+  } else {
+    // Append a sequence number id for dumps following the first
+    char fn[33];
+    sprintf(fn, ".%d", dump_file_seq);
+    assert(strlen(base_path) + strlen(fn) < sizeof(my_path), "HeapDumpPath too long");
+    strcpy(my_path, base_path);
+    strcat(my_path, fn);
   }
-  // If HeapDumpPath wasn't a file name then we append the default name
-  if (use_default_filename) {
-    char fn[32];
-    sprintf(fn, "java_pid%d.hprof", os::current_process_id());
-    assert(strlen(path) + strlen(fn) < sizeof(path), "HeapDumpPath too long");
-    strcat(path, fn);
-  }
+  dump_file_seq++;   // increment seq number for next time we dump
+  assert(strlen(".hprof") + strlen(my_path) < sizeof(my_path), "HeapDumpPath too long");
+  strcat(my_path, ".hprof");
 
   HeapDumper dumper(false /* no GC before heap dump */,
                     true  /* send to tty */);
-  dumper.dump(path);
+  dumper.dump(my_path);
 }
--- a/hotspot/src/share/vm/services/heapDumper.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/services/heapDumper.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -53,7 +53,7 @@
 
  public:
   HeapDumper(bool gc_before_heap_dump) :
-    _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(false)  { }
+    _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(false) { }
   HeapDumper(bool gc_before_heap_dump, bool print_to_tty) :
     _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(print_to_tty) { }
 
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -881,7 +881,7 @@
     i++; p *= 2;
   }
   // p = 2^(i+1) && x < p (i.e., 2^i <= x < 2^(i+1))
-  // (if p = 0 then overflow occured and i = 31)
+  // (if p = 0 then overflow occurred and i = 31)
   return i;
 }
 
@@ -895,7 +895,7 @@
     i++; p *= 2;
   }
   // p = 2^(i+1) && x < p (i.e., 2^i <= x < 2^(i+1))
-  // (if p = 0 then overflow occured and i = 63)
+  // (if p = 0 then overflow occurred and i = 63)
   return i;
 }
 
--- a/hotspot/src/share/vm/utilities/growableArray.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/utilities/growableArray.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -43,11 +43,13 @@
 #endif
 
 void* GenericGrowableArray::raw_allocate(int elementSize) {
+  assert(_max >= 0, "integer overflow");
+  size_t byte_size = elementSize * (size_t) _max;
   if (on_stack()) {
-    return (void*)resource_allocate_bytes(elementSize * _max);
+    return (void*)resource_allocate_bytes(byte_size);
   } else if (on_C_heap()) {
-    return (void*)AllocateHeap(elementSize * _max, "GrET in " __FILE__);
+    return (void*)AllocateHeap(byte_size, "GrET in " __FILE__);
   } else {
-    return _arena->Amalloc(elementSize * _max);
+    return _arena->Amalloc(byte_size);
   }
 }
--- a/hotspot/src/share/vm/utilities/vmError.cpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/utilities/vmError.cpp	Thu Mar 12 18:16:36 2009 -0700
@@ -306,7 +306,7 @@
 
          strncpy(buf, file, buflen);
          if (len + 10 < buflen) {
-           sprintf(buf + len, ":" SIZE_FORMAT, _lineno);
+           sprintf(buf + len, ":%d", _lineno);
          }
          st->print(" (%s)", buf);
        } else {
@@ -420,7 +420,7 @@
 
        if (fr.sp()) {
          st->print(",  sp=" PTR_FORMAT, fr.sp());
-         st->print(",  free space=%dk",
+         st->print(",  free space=%" INTPTR_FORMAT "k",
                      ((intptr_t)fr.sp() - (intptr_t)stack_bottom) >> 10);
        }
 
--- a/hotspot/src/share/vm/utilities/vmError.hpp	Mon Mar 09 13:34:00 2009 -0700
+++ b/hotspot/src/share/vm/utilities/vmError.hpp	Thu Mar 12 18:16:36 2009 -0700
@@ -50,7 +50,7 @@
 
   // additional info for VM internal errors
   const char * _filename;
-  size_t       _lineno;
+  int          _lineno;
 
   // used by fatal error handler
   int          _current_step;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6797305/Test6797305.java	Thu Mar 12 18:16:36 2009 -0700
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6797305
+ * @summary Add LoadUB and LoadUI opcode class
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6797305.loadB,Test6797305.loadB2L,Test6797305.loadUB,Test6797305.loadUBmask,Test6797305.loadUB2L,Test6797305.loadS,Test6797305.loadS2L,Test6797305.loadUS,Test6797305.loadUSmask,Test6797305.loadUS2L,Test6797305.loadI,Test6797305.loadI2L,Test6797305.loadUI2L,Test6797305.loadL Test6797305
+ */
+
+public class Test6797305 {
+    static final byte[]  ba = new byte[]  { -1 };
+    static final short[] sa = new short[] { -1 };
+    static final int[]   ia = new int[]   { -1 };
+    static final long[]  la = new long[]  { -1 };
+
+    public static void main(String[] args)
+    {
+        long b = loadB(ba);
+        if (b != -1)
+            throw new InternalError("loadB failed: " + b + " != " + -1);
+
+        long b2l = loadB2L(ba);
+        if (b2l != -1L)
+            throw new InternalError("loadB2L failed: " + b2l + " != " + -1L);
+
+        int ub = loadUB(ba);
+        if (ub != 0xFF)
+            throw new InternalError("loadUB failed: " + ub + " != " + 0xFF);
+
+        int ubmask = loadUBmask(ba);
+        if (ubmask != 0xFE)
+            throw new InternalError("loadUBmask failed: " + ubmask + " != " + 0xFE);
+
+        long ub2l = loadUB2L(ba);
+        if (ub2l != 0xFFL)
+            throw new InternalError("loadUB2L failed: " + ub2l + " != " + 0xFFL);
+
+        int s = loadS(sa);
+        if (s != -1)
+            throw new InternalError("loadS failed: " + s + " != " + -1);
+
+        long s2l = loadS2L(sa);
+        if (s2l != -1L)
+            throw new InternalError("loadS2L failed: " + s2l + " != " + -1L);
+
+        int us = loadUS(sa);
+        if (us != 0xFFFF)
+            throw new InternalError("loadUS failed: " + us + " != " + 0xFFFF);
+
+        int usmask = loadUSmask(sa);
+        if (usmask != 0xFFFE)
+            throw new InternalError("loadUBmask failed: " + ubmask + " != " + 0xFFFE);
+
+        long us2l = loadUS2L(sa);
+        if (us2l != 0xFFFFL)
+            throw new InternalError("loadUS2L failed: " + us2l + " != " + 0xFFFFL);
+
+        int i = loadI(ia);
+        if (i != -1)
+            throw new InternalError("loadI failed: " + i + " != " + -1);
+
+        long i2l = loadI2L(ia);
+        if (i2l != -1L)
+            throw new InternalError("loadI2L failed: " + i2l + " != " + -1L);
+
+        long ui2l = loadUI2L(ia);
+        if (ui2l != 0xFFFFFFFFL)
+            throw new InternalError("loadUI2L failed: " + ui2l + " != " + 0xFFFFFFFFL);
+
+        long l = loadL(la);
+        if (l != -1L)
+            throw new InternalError("loadL failed: " + l + " != " + -1L);
+    }
+
+    static int  loadB     (byte[] ba)  { return ba[0];               }
+    static long loadB2L   (byte[] ba)  { return ba[0];               }
+    static int  loadUB    (byte[] ba)  { return ba[0] & 0xFF;        }
+    static int  loadUBmask(byte[] ba)  { return ba[0] & 0xFE;        }
+    static long loadUB2L  (byte[] ba)  { return ba[0] & 0xFF;        }
+
+    static int  loadS     (short[] sa) { return sa[0];               }
+    static long loadS2L   (short[] sa) { return sa[0];               }
+    static int  loadUS    (short[] sa) { return sa[0] & 0xFFFF;      }
+    static int  loadUSmask(short[] sa) { return sa[0] & 0xFFFE;      }
+    static long loadUS2L  (short[] sa) { return sa[0] & 0xFFFF;      }
+
+    static int  loadI     (int[] ia)   { return ia[0];               }
+    static long loadI2L   (int[] ia)   { return ia[0];               }
+    static long loadUI2L  (int[] ia)   { return ia[0] & 0xFFFFFFFFL; }
+
+    static long loadL     (long[] la)  { return la[0];               }
+}