8164888: Intrinsify fused mac operations on SPARC
authorneliasso
Tue, 27 Jun 2017 15:50:09 +0200
changeset 46597 d669fb842ae3
parent 46596 a7c9706d25a9
child 46598 41a61509db75
8164888: Intrinsify fused mac operations on SPARC Summary: Such speed, much wow Reviewed-by: kvn Contributed-by: phedlin@oracle.com
hotspot/src/cpu/sparc/vm/abstractInterpreter_sparc.cpp
hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp
hotspot/src/cpu/sparc/vm/sparc.ad
hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp
hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
hotspot/src/share/vm/runtime/stubRoutines.hpp
--- a/hotspot/src/cpu/sparc/vm/abstractInterpreter_sparc.cpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/abstractInterpreter_sparc.cpp	Tue Jun 27 15:50:09 2017 +0200
@@ -52,8 +52,16 @@
   return i;
 }
 
+// These should never be compiled since the interpreter will prefer the compiled
+// version to the intrinsic version.
 bool AbstractInterpreter::can_be_compiled(methodHandle m) {
-  // No special entry points that preclude compilation
+  switch (method_kind(m)) {
+    case Interpreter::java_lang_math_fmaD:
+    case Interpreter::java_lang_math_fmaF:
+      return false;
+    default:
+      break;
+  }
   return true;
 }
 
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Tue Jun 27 15:50:09 2017 +0200
@@ -628,6 +628,9 @@
   // CRC32C instruction supported only on certain processors
   static void crc32c_only() { assert(VM_Version::has_crc32c(), "This instruction only works on SPARC with CRC32C"); }
 
+  // FMAf instructions supported only on certain processors
+  static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }
+
   // instruction only in VIS1
   static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
 
@@ -923,6 +926,10 @@
 
   inline void fsqrt(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d);
 
+  // fmaf instructions.
+
+  inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+
   // pp 165
 
   inline void flush(Register s1, Register s2);
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Tue Jun 27 15:50:09 2017 +0200
@@ -355,6 +355,11 @@
   emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w));
 }
 
+inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+  fmaf_only();
+  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
+}
+
 inline void Assembler::flush(Register s1, Register s2) {
   emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
 }
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Tue Jun 27 15:50:09 2017 +0200
@@ -440,6 +440,31 @@
 }
 
 void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  switch (op->code()) {
+    case lir_idiv:
+    case lir_irem:  // Both idiv & irem are handled after the switch (below).
+      break;
+    case lir_fmaf:
+      __ fmadd(FloatRegisterImpl::S,
+               op->in_opr1()->as_float_reg(),
+               op->in_opr2()->as_float_reg(),
+               op->in_opr3()->as_float_reg(),
+               op->result_opr()->as_float_reg());
+      return;
+    case lir_fmad:
+      __ fmadd(FloatRegisterImpl::D,
+               op->in_opr1()->as_double_reg(),
+               op->in_opr2()->as_double_reg(),
+               op->in_opr3()->as_double_reg(),
+               op->result_opr()->as_double_reg());
+      return;
+    default:
+      ShouldNotReachHere();
+      break;
+  }
+
+  // Handle idiv & irem:
+
   Register Rdividend = op->in_opr1()->as_register();
   Register Rdivisor  = noreg;
   Register Rscratch  = op->in_opr3()->as_register();
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Tue Jun 27 15:50:09 2017 +0200
@@ -953,7 +953,29 @@
 }
 
 void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
-  fatal("FMA intrinsic is not implemented on this platform");
+  assert(x->number_of_arguments() == 3, "wrong type");
+  assert(UseFMA, "Needs FMA instructions support.");
+
+  LIRItem a(x->argument_at(0), this);
+  LIRItem b(x->argument_at(1), this);
+  LIRItem c(x->argument_at(2), this);
+
+  a.load_item();
+  b.load_item();
+  c.load_item();
+
+  LIR_Opr ina = a.result();
+  LIR_Opr inb = b.result();
+  LIR_Opr inc = c.result();
+  LIR_Opr res = rlock_result(x);
+
+  switch (x->id()) {
+    case vmIntrinsics::_fmaF: __ fmaf(ina, inb, inc, res); break;
+    case vmIntrinsics::_fmaD: __ fmad(ina, inb, inc, res); break;
+    default:
+      ShouldNotReachHere();
+      break;
+  }
 }
 
 void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Tue Jun 27 15:50:09 2017 +0200
@@ -33,7 +33,7 @@
 
 define_pd_global(bool, BackgroundCompilation,        true);
 define_pd_global(bool, CICompileOSR,                 true);
-define_pd_global(bool, InlineIntrinsics,             false);
+define_pd_global(bool, InlineIntrinsics,             true);
 define_pd_global(bool, PreferInterpreterNativeStubs, false);
 define_pd_global(bool, ProfileTraps,                 true);
 define_pd_global(bool, UseOnStackReplacement,        true);
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Tue Jun 27 15:50:09 2017 +0200
@@ -2627,6 +2627,33 @@
     __ fsqrt(FloatRegisterImpl::D, Fsrc, Fdst);
 %}
 
+
+
+enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+    __ fmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+    __ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
+
+
+
+
 enc_class fmovs (dflt_reg dst, dflt_reg src) %{
     MacroAssembler _masm(&cbuf);
 
@@ -4540,6 +4567,26 @@
     FDIV  : C(17);
 %}
 
+// Fused floating-point multiply-add float.
+pipe_class fmaF_regx4(regF dst, regF src1, regF src2, regF src3) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    src3  : E(read);
+    FM    : R;
+%}
+
+// Fused gloating-point multiply-add double.
+pipe_class fmaD_regx4(regD dst, regD src1, regD src2, regD src3) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    src3  : E(read);
+    FM    : R;
+%}
+
 // Floating Point Move/Negate/Abs Float
 pipe_class faddF_reg(regF dst, regF src) %{
     single_instruction;
@@ -7531,6 +7578,24 @@
   ins_pipe(fdivD_reg_reg);
 %}
 
+// Single precision fused floating-point multiply-add (d = a * b + c).
+instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF c (Binary a b)));
+  format %{ "fmadds $a,$b,$c,$dst\t# $dst = $a * $b + $c" %}
+  ins_encode(fmadds(dst, a, b, c));
+  ins_pipe(fmaF_regx4);
+%}
+
+// Double precision fused floating-point multiply-add (d = a * b + c).
+instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD c (Binary a b)));
+  format %{ "fmaddd $a,$b,$c,$dst\t# $dst = $a * $b + $c" %}
+  ins_encode(fmaddd(dst, a, b, c));
+  ins_pipe(fmaD_regx4);
+%}
+
 //----------Logical Instructions-----------------------------------------------
 // And Instructions
 // Register And
--- a/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp	Tue Jun 27 15:50:09 2017 +0200
@@ -153,13 +153,12 @@
     __ delayed()->srl( G4_scratch, 2, G4_scratch );
 
     __ bind(NextArg);
-
   }
 
   __ bind(done);
   __ ret();
-  __ delayed()->
-     restore(O0, 0, Lscratch);  // caller's Lscratch gets the result handler
+  __ delayed()->restore(O0, 0, Lscratch);  // caller's Lscratch gets the result handler
+
   return entry;
 }
 
@@ -177,7 +176,6 @@
   // returns verified_entry_point or NULL
   // we ignore it in any case
   __ ba_short(Lcontinue);
-
 }
 
 
@@ -196,7 +194,6 @@
   // the call_VM checks for exception, so we should never return here.
   __ should_not_reach_here();
   return entry;
-
 }
 
 void TemplateInterpreterGenerator::save_native_result(void) {
@@ -474,7 +471,6 @@
     __ delayed()->nop();
     __ bind(done);
   }
-
 }
 
 // Allocate monitor and lock method (asm interpreter)
@@ -590,7 +586,7 @@
 //   pop parameters from the callers stack by adjusting Lesp
 //   set O0 to Lesp
 //   compute X = (max_locals - num_parameters)
-//   bump SP up by X to accomadate the extra locals
+//   bump SP up by X to accommodate the extra locals
 //   compute X = max_expression_stack
 //               + vm_local_words
 //               + 16 words of register save area
@@ -688,7 +684,7 @@
   // 1) Increase caller's SP by for the extra local space needed:
   //    (check for overflow)
   //    Efficient implementation of xload/xstore bytecodes requires
-  //    that arguments and non-argument locals are in a contigously
+  //    that arguments and non-argument locals are in a contiguously
   //    addressable memory block => non-argument locals must be
   //    allocated in the caller's frame.
   //
@@ -782,7 +778,7 @@
     __ sub(Gframe_size, Glocals_size, Gframe_size);
 
     //
-    // bump SP to accomodate the extra locals
+    // bump SP to accommodate the extra locals
     //
     __ sub(SP, Glocals_size, SP);
   }
@@ -810,9 +806,9 @@
   Register mirror = LcpoolCache;
   __ load_mirror(mirror, Lmethod);
   __ st_ptr(mirror, FP, (frame::interpreter_frame_mirror_offset * wordSize) + STACK_BIAS);
-  __ get_constant_pool_cache( LcpoolCache );   // set LcpoolCache
+  __ get_constant_pool_cache(LcpoolCache);     // set LcpoolCache
   __ sub(FP, rounded_vm_local_words * BytesPerWord, Lmonitors ); // set Lmonitors
-  __ add( Lmonitors, STACK_BIAS, Lmonitors );   // Account for 64 bit stack bias
+  __ add(Lmonitors, STACK_BIAS, Lmonitors);    // Account for 64 bit stack bias
   __ sub(Lmonitors, BytesPerWord, Lesp);       // set Lesp
 
   // setup interpreter activation registers
@@ -984,7 +980,7 @@
       __ ldx( Gargs, 16, buf);
       __ lduw(Gargs, 24, crc);
       __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
-      __ add(buf ,offset, buf);
+      __ add(buf, offset, buf);
     }
 
     // Call the crc32 kernel
@@ -1057,8 +1053,58 @@
   return NULL;
 }
 
-// Not supported
-address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+/* Math routines only partially supported.
+ *
+ *   Providing support for fma (float/double) only.
+ */
+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind)
+{
+  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
+
+  address entry = __ pc();
+
+  switch (kind) {
+    case Interpreter::java_lang_math_fmaF:
+      if (UseFMA) {
+        // float .fma(float a, float b, float c)
+        const FloatRegister ra = F1;
+        const FloatRegister rb = F2;
+        const FloatRegister rc = F3;
+        const FloatRegister rd = F0; // Result.
+
+        __ ldf(FloatRegisterImpl::S, Gargs,  0, rc);
+        __ ldf(FloatRegisterImpl::S, Gargs,  8, rb);
+        __ ldf(FloatRegisterImpl::S, Gargs, 16, ra);
+
+        __ fmadd(FloatRegisterImpl::S, ra, rb, rc, rd);
+        __ retl();  // Result in F0 (rd).
+        __ delayed()->mov(O5_savedSP, SP);
+
+        return entry;
+      }
+      break;
+    case Interpreter::java_lang_math_fmaD:
+      if (UseFMA) {
+        // double .fma(double a, double b, double c)
+        const FloatRegister ra = F2; // D1
+        const FloatRegister rb = F4; // D2
+        const FloatRegister rc = F6; // D3
+        const FloatRegister rd = F0; // D0 Result.
+
+        __ ldf(FloatRegisterImpl::D, Gargs,  0, rc);
+        __ ldf(FloatRegisterImpl::D, Gargs, 16, rb);
+        __ ldf(FloatRegisterImpl::D, Gargs, 32, ra);
+
+        __ fmadd(FloatRegisterImpl::D, ra, rb, rc, rd);
+        __ retl();  // Result in D0 (rd).
+        __ delayed()->mov(O5_savedSP, SP);
+
+        return entry;
+      }
+      break;
+    default:
+      break;
+  }
   return NULL;
 }
 
@@ -1071,7 +1117,7 @@
   // Doing the banging earlier fails if the caller frame is not an interpreter
   // frame.
   // (Also, the exception throwing code expects to unlock any synchronized
-  // method receiever, so do the banging after locking the receiver.)
+  // method receiver, so do the banging after locking the receiver.)
 
   // Bang each page in the shadow zone. We can't assume it's been done for
   // an interpreter frame with greater than a page of locals, so each page
@@ -1112,8 +1158,7 @@
   // rethink these assertions - they can be simplified and shared (gri 2/25/2000)
 #ifdef ASSERT
   __ ld(G5_method, Method::access_flags_offset(), Gtmp1);
-  {
-    Label L;
+  { Label L;
     __ btst(JVM_ACC_NATIVE, Gtmp1);
     __ br(Assembler::notZero, false, Assembler::pt, L);
     __ delayed()->nop();
@@ -1362,7 +1407,7 @@
     //     didn't see any synchronization is progress, and escapes.
     __ set(_thread_in_native_trans, G3_scratch);
     __ st(G3_scratch, thread_state);
-    if(os::is_MP()) {
+    if (os::is_MP()) {
       if (UseMembar) {
         // Force this write out before the read below
         __ membar(Assembler::StoreLoad);
@@ -1425,8 +1470,7 @@
   // If we have an oop result store it where it will be safe for any further gc
   // until we return now that we've released the handle it might be protected by
 
-  {
-    Label no_oop, store_result;
+  { Label no_oop, store_result;
 
     __ set((intptr_t)AbstractInterpreter::result_handler(T_OBJECT), G3_scratch);
     __ cmp_and_brx_short(G3_scratch, Lscratch, Assembler::notEqual, Assembler::pt, no_oop);
@@ -1484,8 +1528,7 @@
 
   // dispose of return address and remove activation
 #ifdef ASSERT
-  {
-    Label ok;
+  { Label ok;
     __ cmp_and_brx_short(I5_savedSP, FP, Assembler::greaterEqualUnsigned, Assembler::pt, ok);
     __ stop("bad I5_savedSP value");
     __ should_not_reach_here();
@@ -1495,15 +1538,12 @@
   __ jmp(Lscratch, 0);
   __ delayed()->nop();
 
-
   if (inc_counter) {
     // handle invocation counter overflow
     __ bind(invocation_counter_overflow);
     generate_counter_overflow(Lcontinue);
   }
 
-
-
   return entry;
 }
 
@@ -1533,8 +1573,7 @@
   // rethink these assertions - they can be simplified and shared (gri 2/25/2000)
 #ifdef ASSERT
   __ ld(G5_method, Method::access_flags_offset(), Gtmp1);
-  {
-    Label L;
+  { Label L;
     __ btst(JVM_ACC_NATIVE, Gtmp1);
     __ br(Assembler::zero, false, Assembler::pt, L);
     __ delayed()->nop();
@@ -1666,7 +1705,6 @@
     generate_counter_overflow(Lcontinue);
   }
 
-
   return entry;
 }
 
@@ -1786,8 +1824,7 @@
   }
 
 #if INCLUDE_JVMTI
-  {
-    Label L_done;
+  { Label L_done;
 
     __ ldub(Address(Lbcp, 0), G1_scratch);  // Load current bytecode
     __ cmp_and_br_short(G1_scratch, Bytecodes::_invokestatic, Assembler::notEqual, Assembler::pn, L_done);
@@ -1827,7 +1864,7 @@
   __ get_vm_result(Oexception);
   __ verify_oop(Oexception);
 
-    const int return_reg_adjustment = frame::pc_return_offset;
+  const int return_reg_adjustment = frame::pc_return_offset;
   Address issuing_pc_addr(I7, return_reg_adjustment);
 
   // We are done with this activation frame; find out where to go next.
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Tue Jun 27 15:50:09 2017 +0200
@@ -317,7 +317,11 @@
     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
   }
 
-  if (UseFMA) {
+  if (has_fmaf()) {
+    if (FLAG_IS_DEFAULT(UseFMA)) {
+      UseFMA = true;
+    }
+  } else if (UseFMA) {
     warning("FMA instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseFMA, false);
   }
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp	Tue Jun 27 15:46:16 2017 +0200
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp	Tue Jun 27 15:50:09 2017 +0200
@@ -248,7 +248,7 @@
   static jint    verify_oop_count()                        { return _verify_oop_count; }
   static jint*   verify_oop_count_addr()                   { return &_verify_oop_count; }
   // a subroutine for debugging the GC
-  static address verify_oop_subroutine_entry_address()    { return (address)&_verify_oop_subroutine_entry; }
+  static address verify_oop_subroutine_entry_address()     { return (address)&_verify_oop_subroutine_entry; }
 
   static address catch_exception_entry()                   { return _catch_exception_entry; }
 
@@ -335,8 +335,8 @@
   static address checkcast_arraycopy(bool dest_uninitialized = false) {
     return dest_uninitialized ? _checkcast_arraycopy_uninit : _checkcast_arraycopy;
   }
-  static address unsafe_arraycopy()        { return _unsafe_arraycopy; }
-  static address generic_arraycopy()       { return _generic_arraycopy; }
+  static address unsafe_arraycopy()    { return _unsafe_arraycopy; }
+  static address generic_arraycopy()   { return _generic_arraycopy; }
 
   static address jbyte_fill()          { return _jbyte_fill; }
   static address jshort_fill()         { return _jshort_fill; }
@@ -349,8 +349,8 @@
   static address aescrypt_decryptBlock()                { return _aescrypt_decryptBlock; }
   static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
   static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
-  static address counterMode_AESCrypt() { return _counterMode_AESCrypt; }
-  static address ghash_processBlocks() { return _ghash_processBlocks; }
+  static address counterMode_AESCrypt()  { return _counterMode_AESCrypt; }
+  static address ghash_processBlocks()   { return _ghash_processBlocks; }
 
   static address sha1_implCompress()     { return _sha1_implCompress; }
   static address sha1_implCompressMB()   { return _sha1_implCompressMB; }
@@ -366,9 +366,9 @@
   static address updateBytesCRC32C()   { return _updateBytesCRC32C; }
   static address updateBytesAdler32()  { return _updateBytesAdler32; }
 
-  static address multiplyToLen()       {return _multiplyToLen; }
-  static address squareToLen()         {return _squareToLen; }
-  static address mulAdd()              {return _mulAdd; }
+  static address multiplyToLen()       { return _multiplyToLen; }
+  static address squareToLen()         { return _squareToLen; }
+  static address mulAdd()              { return _mulAdd; }
   static address montgomeryMultiply()  { return _montgomeryMultiply; }
   static address montgomerySquare()    { return _montgomerySquare; }
 
@@ -376,7 +376,7 @@
 
   static address dexp()                { return _dexp; }
   static address dlog()                { return _dlog; }
-  static address dlog10()                { return _dlog10; }
+  static address dlog10()              { return _dlog10; }
   static address dpow()                { return _dpow; }
   static address dsin()                { return _dsin; }
   static address dcos()                { return _dcos; }
@@ -387,7 +387,7 @@
 
   static address select_fill_function(BasicType t, bool aligned, const char* &name);
 
-  static address zero_aligned_words()   { return _zero_aligned_words; }
+  static address zero_aligned_words()  { return _zero_aligned_words; }
 
   static double  intrinsic_log10(double d) {
     assert(_intrinsic_log10 != NULL, "must be defined");