hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
changeset 46327 91576389a517
parent 43969 ae5c415036b0
parent 46317 5f88747fe29e
child 46338 e84b501fa52e
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Mon Mar 13 19:59:41 2017 +0000
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Tue Mar 14 14:07:24 2017 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -4092,7 +4092,7 @@
  * @param table register pointing to CRC table
  */
 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
-                                           Register data, bool loopAlignment, bool invertCRC) {
+                                           Register data, bool loopAlignment) {
   assert_different_registers(crc, buf, len, table, data);
 
   Label L_mainLoop, L_done;
@@ -4103,10 +4103,6 @@
   clrldi_(len, len, 32);                         // Enforce 32 bit. Anything to do?
   beq(CCR0, L_done);
 
-  if (invertCRC) {
-    nand(crc, crc, crc);                         // ~c
-  }
-
   mtctr(len);
   align(mainLoop_alignment);
   BIND(L_mainLoop);
@@ -4115,10 +4111,6 @@
     update_byte_crc32(crc, data, table);
     bdnz(L_mainLoop);                            // Iterate.
 
-  if (invertCRC) {
-    nand(crc, crc, crc);                         // ~c
-  }
-
   bind(L_done);
 }
 
@@ -4175,7 +4167,8 @@
  */
 void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
                                         Register t0,  Register t1,  Register t2,  Register t3,
-                                        Register tc0, Register tc1, Register tc2, Register tc3) {
+                                        Register tc0, Register tc1, Register tc2, Register tc3,
+                                        bool invertCRC) {
   assert_different_registers(crc, buf, len, table);
 
   Label L_mainLoop, L_tail;
@@ -4189,14 +4182,16 @@
   const int complexThreshold   = 2*mainLoop_stepping;
 
   // Don't test for len <= 0 here. This pathological case should not occur anyway.
-  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
-  // The situation itself is detected and handled correctly by the conditional branches
-  // following  aghi(len, -stepping) and aghi(len, +stepping).
+  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
+  // for all well-behaved cases. The situation itself is detected and handled correctly
+  // within update_byteLoop_crc32.
   assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
 
   BLOCK_COMMENT("kernel_crc32_2word {");
 
-  nand(crc, crc, crc);                           // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
 
   // Check for short (<mainLoop_stepping) buffer.
   cmpdi(CCR0, len, complexThreshold);
@@ -4217,7 +4212,7 @@
       blt(CCR0, L_tail);                         // For less than one mainloop_stepping left, do only tail processing
       mr(len, tmp);                              // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
     }
-    update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
+    update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
   }
 
   srdi(tmp2, len, log_stepping);                 // #iterations for mainLoop
@@ -4253,9 +4248,11 @@
 
   // Process last few (<complexThreshold) bytes of buffer.
   BIND(L_tail);
-  update_byteLoop_crc32(crc, buf, len, table, data, false, false);
-
-  nand(crc, crc, crc);                           // ~c
+  update_byteLoop_crc32(crc, buf, len, table, data, false);
+
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
   BLOCK_COMMENT("} kernel_crc32_2word");
 }
 
@@ -4269,7 +4266,8 @@
  */
 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
                                         Register t0,  Register t1,  Register t2,  Register t3,
-                                        Register tc0, Register tc1, Register tc2, Register tc3) {
+                                        Register tc0, Register tc1, Register tc2, Register tc3,
+                                        bool invertCRC) {
   assert_different_registers(crc, buf, len, table);
 
   Label L_mainLoop, L_tail;
@@ -4283,14 +4281,16 @@
   const int complexThreshold   = 2*mainLoop_stepping;
 
   // Don't test for len <= 0 here. This pathological case should not occur anyway.
-  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
-  // The situation itself is detected and handled correctly by the conditional branches
-  // following  aghi(len, -stepping) and aghi(len, +stepping).
+  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
+  // for all well-behaved cases. The situation itself is detected and handled correctly
+  // within update_byteLoop_crc32.
   assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
 
   BLOCK_COMMENT("kernel_crc32_1word {");
 
-  nand(crc, crc, crc);                           // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
 
   // Check for short (<mainLoop_stepping) buffer.
   cmpdi(CCR0, len, complexThreshold);
@@ -4311,7 +4311,7 @@
       blt(CCR0, L_tail);                         // For less than one mainloop_stepping left, do only tail processing
       mr(len, tmp);                              // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
     }
-    update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
+    update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
   }
 
   srdi(tmp2, len, log_stepping);                 // #iterations for mainLoop
@@ -4346,9 +4346,11 @@
 
   // Process last few (<complexThreshold) bytes of buffer.
   BIND(L_tail);
-  update_byteLoop_crc32(crc, buf, len, table, data, false, false);
-
-  nand(crc, crc, crc);                           // ~c
+  update_byteLoop_crc32(crc, buf, len, table, data, false);
+
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
   BLOCK_COMMENT("} kernel_crc32_1word");
 }
 
@@ -4361,16 +4363,24 @@
  * Uses R7_ARG5, R8_ARG6 as work registers.
  */
 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
-                                        Register t0,  Register t1,  Register t2,  Register t3) {
+                                        Register t0,  Register t1,  Register t2,  Register t3,
+                                        bool invertCRC) {
   assert_different_registers(crc, buf, len, table);
 
   Register  data = t0;                   // Holds the current byte to be folded into crc.
 
   BLOCK_COMMENT("kernel_crc32_1byte {");
 
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
+
   // Process all bytes in a single-byte loop.
-  update_byteLoop_crc32(crc, buf, len, table, data, true, true);
-
+  update_byteLoop_crc32(crc, buf, len, table, data, true);
+
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
   BLOCK_COMMENT("} kernel_crc32_1byte");
 }
 
@@ -4388,7 +4398,8 @@
  */
 void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
                                                 Register constants,  Register barretConstants,
-                                                Register t0,  Register t1, Register t2, Register t3, Register t4) {
+                                                Register t0,  Register t1, Register t2, Register t3, Register t4,
+                                                bool invertCRC) {
   assert_different_registers(crc, buf, len, table);
 
   Label L_alignedHead, L_tail, L_alignTail, L_start, L_end;
@@ -4406,13 +4417,15 @@
     Register tc0 = t4;
     Register tc1 = constants;
     Register tc2 = barretConstants;
-    kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table);
+    kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
     b(L_end);
 
   BIND(L_start);
 
     // 2. ~c
-    nand(crc, crc, crc);
+    if (invertCRC) {
+      nand(crc, crc, crc);                      // 1s complement of crc
+    }
 
     // 3. calculate from 0 to first 128bit-aligned address
     clrldi_(prealign, buf, 57);
@@ -4421,7 +4434,7 @@
     subfic(prealign, prealign, 128);
 
     subf(len, prealign, len);
-    update_byteLoop_crc32(crc, buf, prealign, table, t2, false, false);
+    update_byteLoop_crc32(crc, buf, prealign, table, t2, false);
 
     // 4. calculate from first 128bit-aligned address to last 128bit-aligned address
     BIND(L_alignedHead);
@@ -4436,12 +4449,14 @@
     cmpdi(CCR0, postalign, 0);
     beq(CCR0, L_tail);
 
-    update_byteLoop_crc32(crc, buf, postalign, table, t2, false, false);
+    update_byteLoop_crc32(crc, buf, postalign, table, t2, false);
 
     BIND(L_tail);
 
     // 6. ~c
-    nand(crc, crc, crc);
+    if (invertCRC) {
+      nand(crc, crc, crc);                      // 1s complement of crc
+    }
 
   BIND(L_end);
 
@@ -4933,16 +4948,35 @@
   offsetInt -= 8;  ld(R31, offsetInt, R1_SP);
 }
 
-void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) {
+void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, bool invertCRC) {
   assert_different_registers(crc, buf, /* len,  not used!! */ table, tmp);
 
   BLOCK_COMMENT("kernel_crc32_singleByte:");
-  nand(crc, crc, crc);       // ~c
-
-  lbz(tmp, 0, buf);          // Byte from buffer, zero-extended.
+  if (invertCRC) {
+    nand(crc, crc, crc);                // 1s complement of crc
+  }
+
+  lbz(tmp, 0, buf);                     // Byte from buffer, zero-extended.
   update_byte_crc32(crc, tmp, table);
 
-  nand(crc, crc, crc);       // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                // 1s complement of crc
+  }
+}
+
+void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, bool invertCRC) {
+  assert_different_registers(crc, val, table);
+
+  BLOCK_COMMENT("kernel_crc32_singleByteReg:");
+  if (invertCRC) {
+    nand(crc, crc, crc);                // 1s complement of crc
+  }
+
+  update_byte_crc32(crc, val, table);
+
+  if (invertCRC) {
+    nand(crc, crc, crc);                // 1s complement of crc
+  }
 }
 
 // dest_lo += src1 + src2