hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
changeset 22861 f5c393d456fc
parent 22852 1063026e8cee
child 22867 309bcf262a19
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Tue Dec 10 14:29:43 2013 +0100
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Wed Dec 11 00:06:11 2013 +0100
@@ -97,8 +97,10 @@
   }
 }
 
-void MacroAssembler::align(int modulus) {
-  while (offset() % modulus != 0) nop();
+void MacroAssembler::align(int modulus, int max, int rem) {
+  int padding = (rem + modulus - (offset() % modulus)) % modulus;
+  if (padding > max) return;
+  for (int c = (padding >> 2); c > 0; --c) { nop(); }
 }
 
 // Issue instructions that calculate given TOC from global TOC.
@@ -186,16 +188,25 @@
 
 #ifdef _LP64
 // Patch compressed oops or klass constants.
+// Assembler sequence is
+// 1) compressed oops:
+//    lis  rx = const.hi
+//    ori rx = rx | const.lo
+// 2) compressed klass:
+//    lis  rx = const.hi
+//    clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
+//    ori rx = rx | const.lo
+// Clrldi will be passed by.
 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
   assert(UseCompressedOops, "Should only patch compressed oops");
 
   const address inst2_addr = a;
   const int inst2 = *(int *)inst2_addr;
 
-  // The relocation points to the second instruction, the addi,
-  // and the addi reads and writes the same register dst.
-  const int dst = inv_rt_field(inst2);
-  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
+  // The relocation points to the second instruction, the ori,
+  // and the ori reads and writes the same register dst.
+  const int dst = inv_rta_field(inst2);
+  assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
   // Now, find the preceding addis which writes to dst.
   int inst1 = 0;
   address inst1_addr = inst2_addr - BytesPerInstWord;
@@ -210,8 +221,9 @@
   int xc = (data >> 16) & 0xffff;
   int xd = (data >>  0) & 0xffff;
 
-  set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2
+  set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
   set_imm((int *)inst2_addr, (short)(xd));
+
   return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
 }
 
@@ -222,10 +234,10 @@
   const address inst2_addr = a;
   const int inst2 = *(int *)inst2_addr;
 
-  // The relocation points to the second instruction, the addi,
-  // and the addi reads and writes the same register dst.
-  const int dst = inv_rt_field(inst2);
-  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
+  // The relocation points to the second instruction, the ori,
+  // and the ori reads and writes the same register dst.
+  const int dst = inv_rta_field(inst2);
+  assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
   // Now, find the preceding lis which writes to dst.
   int inst1 = 0;
   address inst1_addr = inst2_addr - BytesPerInstWord;
@@ -238,8 +250,9 @@
   }
   assert(inst1_found, "inst is not lis");
 
-  uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff));
-  uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16);
+  uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff));
+  uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16);
+
   return (int) (xl | xh);
 }
 #endif // _LP64
@@ -252,13 +265,10 @@
   // FIXME: We should insert relocation information for oops at the constant
   // pool entries instead of inserting it at the loads; patching of a constant
   // pool entry should be less expensive.
-  Unimplemented();
-  if (false) {
-    address oop_address = address_constant((address)a.value(), RelocationHolder::none);
-    // Relocate at the pc of the load.
-    relocate(a.rspec());
-    toc_offset = (int)(oop_address - code()->consts()->start());
-  }
+  address oop_address = address_constant((address)a.value(), RelocationHolder::none);
+  // Relocate at the pc of the load.
+  relocate(a.rspec());
+  toc_offset = (int)(oop_address - code()->consts()->start());
   ld_largeoffset_unchecked(dst, toc_offset, toc, true);
 }
 
@@ -532,7 +542,7 @@
       masm.b(dest);
     }
   }
-  ICache::invalidate_range(instruction_addr, code_size);
+  ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
 }
 
 // Emit a NOT mt-safe patchable 64 bit absolute call/jump.
@@ -673,7 +683,7 @@
   CodeBuffer buf(instruction_addr, code_size);
   MacroAssembler masm(&buf);
   masm.bxx64_patchable(dest, relocInfo::none, link);
-  ICache::invalidate_range(instruction_addr, code_size);
+  ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
 }
 
 // Get dest address of a bxx64_patchable instruction.
@@ -964,6 +974,14 @@
                        /*load env=*/true);
 }
 
+address MacroAssembler::call_c_and_return_to_caller(Register fd) {
+  return branch_to(fd, /*and_link=*/false,
+                       /*save toc=*/false,
+                       /*restore toc=*/false,
+                       /*load toc=*/true,
+                       /*load env=*/true);
+}
+
 address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) {
   if (rt != relocInfo::none) {
     // this call needs to be relocatable
@@ -2315,7 +2333,7 @@
   if (last_Java_pc != noreg)
     std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);
 
-  // set last_Java_sp last
+  // Set last_Java_sp last.
   std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
 }
 
@@ -2454,6 +2472,57 @@
   }
 }
 
+// Clear Array
+// Kills both input registers. tmp == R0 is allowed.
+void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
+  // Procedure for large arrays (uses data cache block zero instruction).
+    Label startloop, fast, fastloop, small_rest, restloop, done;
+    const int cl_size         = VM_Version::get_cache_line_size(),
+              cl_dwords       = cl_size>>3,
+              cl_dw_addr_bits = exact_log2(cl_dwords),
+              dcbz_min        = 1;                     // Min count of dcbz executions, needs to be >0.
+
+//2:
+    cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
+    blt(CCR1, small_rest);                                      // Too small.
+    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits);           // Extract dword offset within first cache line.
+    beq(CCR0, fast);                                            // Already 128byte aligned.
+
+    subfic(tmp, tmp, cl_dwords);
+    mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
+    subf(cnt_dwords, tmp, cnt_dwords); // rest.
+    li(tmp, 0);
+//10:
+  bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
+    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(startloop);
+//13:
+  bind(fast);                                  // Clear 128byte blocks.
+    srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
+    andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
+    mtctr(tmp);                                // Load counter.
+//16:
+  bind(fastloop);
+    dcbz(base_ptr);                    // Clear 128byte aligned block.
+    addi(base_ptr, base_ptr, cl_size);
+    bdnz(fastloop);
+    if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
+//20:
+  bind(small_rest);
+    cmpdi(CCR0, cnt_dwords, 0);        // size 0?
+    beq(CCR0, done);                   // rest == 0
+    li(tmp, 0);
+    mtctr(cnt_dwords);                 // Load counter.
+//24:
+  bind(restloop);                      // Clear rest.
+    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(restloop);
+//27:
+  bind(done);
+}
+
 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
 
 // Search for a single jchar in an jchar[].
@@ -2926,12 +2995,11 @@
   if (!VerifyOops) {
     return;
   }
-  // will be preserved.
+  // Will be preserved.
   Register tmp = R11;
   assert(oop != tmp, "precondition");
   unsigned int nbytes_save = 10*8; // 10 volatile gprs
-  address/* FunctionDescriptor** */fd =
-    StubRoutines::verify_oop_subroutine_entry_address();
+  address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
   // save tmp
   mr(R0, tmp);
   // kill tmp