--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Mon Mar 13 19:59:41 2017 +0000
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Tue Mar 14 14:07:24 2017 -0400
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -4092,7 +4092,7 @@
* @param table register pointing to CRC table
*/
void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
- Register data, bool loopAlignment, bool invertCRC) {
+ Register data, bool loopAlignment) {
assert_different_registers(crc, buf, len, table, data);
Label L_mainLoop, L_done;
@@ -4103,10 +4103,6 @@
clrldi_(len, len, 32); // Enforce 32 bit. Anything to do?
beq(CCR0, L_done);
- if (invertCRC) {
- nand(crc, crc, crc); // ~c
- }
-
mtctr(len);
align(mainLoop_alignment);
BIND(L_mainLoop);
@@ -4115,10 +4111,6 @@
update_byte_crc32(crc, data, table);
bdnz(L_mainLoop); // Iterate.
- if (invertCRC) {
- nand(crc, crc, crc); // ~c
- }
-
bind(L_done);
}
@@ -4175,7 +4167,8 @@
*/
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
- Register tc0, Register tc1, Register tc2, Register tc3) {
+ Register tc0, Register tc1, Register tc2, Register tc3,
+ bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
@@ -4189,14 +4182,16 @@
const int complexThreshold = 2*mainLoop_stepping;
// Don't test for len <= 0 here. This pathological case should not occur anyway.
- // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
- // The situation itself is detected and handled correctly by the conditional branches
- // following aghi(len, -stepping) and aghi(len, +stepping).
+ // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
+ // for all well-behaved cases. The situation itself is detected and handled correctly
+ // within update_byteLoop_crc32.
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
BLOCK_COMMENT("kernel_crc32_2word {");
- nand(crc, crc, crc); // ~c
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
// Check for short (<mainLoop_stepping) buffer.
cmpdi(CCR0, len, complexThreshold);
@@ -4217,7 +4212,7 @@
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
}
- update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
+ update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
}
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
@@ -4253,9 +4248,11 @@
// Process last few (<complexThreshold) bytes of buffer.
BIND(L_tail);
- update_byteLoop_crc32(crc, buf, len, table, data, false, false);
-
- nand(crc, crc, crc); // ~c
+ update_byteLoop_crc32(crc, buf, len, table, data, false);
+
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
BLOCK_COMMENT("} kernel_crc32_2word");
}
@@ -4269,7 +4266,8 @@
*/
void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
- Register tc0, Register tc1, Register tc2, Register tc3) {
+ Register tc0, Register tc1, Register tc2, Register tc3,
+ bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
@@ -4283,14 +4281,16 @@
const int complexThreshold = 2*mainLoop_stepping;
// Don't test for len <= 0 here. This pathological case should not occur anyway.
- // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
- // The situation itself is detected and handled correctly by the conditional branches
- // following aghi(len, -stepping) and aghi(len, +stepping).
+ // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
+ // for all well-behaved cases. The situation itself is detected and handled correctly
+ // within update_byteLoop_crc32.
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
BLOCK_COMMENT("kernel_crc32_1word {");
- nand(crc, crc, crc); // ~c
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
// Check for short (<mainLoop_stepping) buffer.
cmpdi(CCR0, len, complexThreshold);
@@ -4311,7 +4311,7 @@
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
}
- update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
+ update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
}
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
@@ -4346,9 +4346,11 @@
// Process last few (<complexThreshold) bytes of buffer.
BIND(L_tail);
- update_byteLoop_crc32(crc, buf, len, table, data, false, false);
-
- nand(crc, crc, crc); // ~c
+ update_byteLoop_crc32(crc, buf, len, table, data, false);
+
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
BLOCK_COMMENT("} kernel_crc32_1word");
}
@@ -4361,16 +4363,24 @@
* Uses R7_ARG5, R8_ARG6 as work registers.
*/
void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
- Register t0, Register t1, Register t2, Register t3) {
+ Register t0, Register t1, Register t2, Register t3,
+ bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Register data = t0; // Holds the current byte to be folded into crc.
BLOCK_COMMENT("kernel_crc32_1byte {");
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
+
// Process all bytes in a single-byte loop.
- update_byteLoop_crc32(crc, buf, len, table, data, true, true);
-
+ update_byteLoop_crc32(crc, buf, len, table, data, true);
+
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
BLOCK_COMMENT("} kernel_crc32_1byte");
}
@@ -4388,7 +4398,8 @@
*/
void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
Register constants, Register barretConstants,
- Register t0, Register t1, Register t2, Register t3, Register t4) {
+ Register t0, Register t1, Register t2, Register t3, Register t4,
+ bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_alignedHead, L_tail, L_alignTail, L_start, L_end;
@@ -4406,13 +4417,15 @@
Register tc0 = t4;
Register tc1 = constants;
Register tc2 = barretConstants;
- kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table);
+ kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
b(L_end);
BIND(L_start);
// 2. ~c
- nand(crc, crc, crc);
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
// 3. calculate from 0 to first 128bit-aligned address
clrldi_(prealign, buf, 57);
@@ -4421,7 +4434,7 @@
subfic(prealign, prealign, 128);
subf(len, prealign, len);
- update_byteLoop_crc32(crc, buf, prealign, table, t2, false, false);
+ update_byteLoop_crc32(crc, buf, prealign, table, t2, false);
// 4. calculate from first 128bit-aligned address to last 128bit-aligned address
BIND(L_alignedHead);
@@ -4436,12 +4449,14 @@
cmpdi(CCR0, postalign, 0);
beq(CCR0, L_tail);
- update_byteLoop_crc32(crc, buf, postalign, table, t2, false, false);
+ update_byteLoop_crc32(crc, buf, postalign, table, t2, false);
BIND(L_tail);
// 6. ~c
- nand(crc, crc, crc);
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
BIND(L_end);
@@ -4933,16 +4948,35 @@
offsetInt -= 8; ld(R31, offsetInt, R1_SP);
}
-void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) {
+void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, bool invertCRC) {
assert_different_registers(crc, buf, /* len, not used!! */ table, tmp);
BLOCK_COMMENT("kernel_crc32_singleByte:");
- nand(crc, crc, crc); // ~c
-
- lbz(tmp, 0, buf); // Byte from buffer, zero-extended.
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
+
+ lbz(tmp, 0, buf); // Byte from buffer, zero-extended.
update_byte_crc32(crc, tmp, table);
- nand(crc, crc, crc); // ~c
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
+}
+
+void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, bool invertCRC) {
+ assert_different_registers(crc, val, table);
+
+ BLOCK_COMMENT("kernel_crc32_singleByteReg:");
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
+
+ update_byte_crc32(crc, val, table);
+
+ if (invertCRC) {
+ nand(crc, crc, crc); // 1s complement of crc
+ }
}
// dest_lo += src1 + src2