8193443: [s390]: EncodeISOArray generates wrong vector code
Reviewed-by: goetz, mdoerr
--- a/src/hotspot/cpu/s390/assembler_s390.hpp Wed Dec 13 21:41:50 2017 -0800
+++ b/src/hotspot/cpu/s390/assembler_s390.hpp Thu Dec 14 12:02:16 2017 +0100
@@ -1459,6 +1459,10 @@
bcondLogNotZero = bcondLogNotZero_Carry | bcondLogNotZero_NoCarry,
bcondLogCarry = bcondLogZero_Carry | bcondLogNotZero_Carry,
bcondLogBorrow = /* bcondLogZero_Borrow | */ bcondLogNotZero_Borrow,
+ // Vector compare instructions
+ bcondVAlltrue = 8, // All vector elements evaluate true
+ bcondVMixed = 4, // Some vector elements evaluate true, some false
+ bcondVAllfalse = 1, // All vector elements evaluate false
// string search instructions
bcondFound = 4,
bcondNotFound = 2,
@@ -3022,6 +3026,12 @@
inline void z_btrue( Label& L);
inline void z_bfalse(Label& L);
+ inline void z_bvat(Label& L); // all true
+ inline void z_bvnt(Label& L); // not all true (mixed or all false)
+ inline void z_bvmix(Label& L); // mixed true and false
+ inline void z_bvaf(Label& L); // not all false (mixed or all true)
+ inline void z_bvnf(Label& L); // all false
+
inline void z_brno( Label& L);
--- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp Wed Dec 13 21:41:50 2017 -0800
+++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp Thu Dec 14 12:02:16 2017 +0100
@@ -1315,23 +1315,28 @@
inline void Assembler::z_exrl(Register r1, Label& L) { z_exrl(r1, target(L)); } // z10
inline void Assembler::z_larl(Register r1, Label& L) { z_larl(r1, target(L)); }
-inline void Assembler::z_bru( Label& L) { z_brc(bcondAlways,target(L)); }
-inline void Assembler::z_brul( Label& L) { z_brcl(bcondAlways,target(L)); }
-inline void Assembler::z_brul( address a) { z_brcl(bcondAlways,a); }
-inline void Assembler::z_brh( Label& L) { z_brc(bcondHigh,target(L)); }
-inline void Assembler::z_brl( Label& L) { z_brc(bcondLow,target(L)); }
-inline void Assembler::z_bre( Label& L) { z_brc(bcondEqual,target(L)); }
-inline void Assembler::z_brnh( Label& L) { z_brc(bcondNotHigh,target(L)); }
-inline void Assembler::z_brnl( Label& L) { z_brc(bcondNotLow,target(L)); }
-inline void Assembler::z_brne( Label& L) { z_brc(bcondNotEqual,target(L)); }
-inline void Assembler::z_brz( Label& L) { z_brc(bcondZero,target(L)); }
-inline void Assembler::z_brnz( Label& L) { z_brc(bcondNotZero,target(L)); }
-inline void Assembler::z_braz( Label& L) { z_brc(bcondAllZero,target(L)); }
-inline void Assembler::z_brnaz( Label& L) { z_brc(bcondNotAllZero,target(L)); }
+inline void Assembler::z_bru( Label& L) { z_brc(bcondAlways, target(L)); }
+inline void Assembler::z_brul( Label& L) { z_brcl(bcondAlways, target(L)); }
+inline void Assembler::z_brul( address a) { z_brcl(bcondAlways,a ); }
+inline void Assembler::z_brh( Label& L) { z_brc(bcondHigh, target(L)); }
+inline void Assembler::z_brl( Label& L) { z_brc(bcondLow, target(L)); }
+inline void Assembler::z_bre( Label& L) { z_brc(bcondEqual, target(L)); }
+inline void Assembler::z_brnh( Label& L) { z_brc(bcondNotHigh, target(L)); }
+inline void Assembler::z_brnl( Label& L) { z_brc(bcondNotLow, target(L)); }
+inline void Assembler::z_brne( Label& L) { z_brc(bcondNotEqual, target(L)); }
+inline void Assembler::z_brz( Label& L) { z_brc(bcondZero, target(L)); }
+inline void Assembler::z_brnz( Label& L) { z_brc(bcondNotZero, target(L)); }
+inline void Assembler::z_braz( Label& L) { z_brc(bcondAllZero, target(L)); }
+inline void Assembler::z_brnaz( Label& L) { z_brc(bcondNotAllZero, target(L)); }
inline void Assembler::z_brnp( Label& L) { z_brc( bcondNotPositive, target( L)); }
-inline void Assembler::z_btrue( Label& L) { z_brc(bcondAllOne,target(L)); }
-inline void Assembler::z_bfalse(Label& L) { z_brc(bcondAllZero,target(L)); }
-inline void Assembler::z_brno( Label& L) { z_brc(bcondNotOrdered,target(L)); }
+inline void Assembler::z_btrue( Label& L) { z_brc(bcondAllOne, target(L)); }
+inline void Assembler::z_bfalse(Label& L) { z_brc(bcondAllZero, target(L)); }
+inline void Assembler::z_bvat( Label& L) { z_brc(bcondVAlltrue, target(L)); }
+inline void Assembler::z_bvnt( Label& L) { z_brc((Assembler::branch_condition)(bcondVMixed | bcondVAllfalse), target(L)); }
+inline void Assembler::z_bvmix( Label& L) { z_brc(bcondVMixed, target(L)); }
+inline void Assembler::z_bvaf( Label& L) { z_brc(bcondVAllfalse, target(L)); }
+inline void Assembler::z_bvnf( Label& L) { z_brc((Assembler::branch_condition)(bcondVMixed | bcondVAlltrue), target(L)); }
+inline void Assembler::z_brno( Label& L) { z_brc(bcondNotOrdered, target(L)); }
inline void Assembler::z_brc( branch_condition m, Label& L) { z_brc(m, target(L)); }
inline void Assembler::z_brcl(branch_condition m, Label& L) { z_brcl(m, target(L)); }
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp Wed Dec 13 21:41:50 2017 -0800
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp Thu Dec 14 12:02:16 2017 +0100
@@ -4914,13 +4914,14 @@
// The result is the number of characters copied before the first incompatible character was found.
// If precise is true, the processing stops exactly at this point. Otherwise, the result may be off
// by a few bytes. The result always indicates the number of copied characters.
+// When used as a character index, the returned value points to the first incompatible character.
//
// Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
// - Different number of characters may have been written to dead array (if precise is false).
// - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt,
Register tmp, bool precise) {
- assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);
+ assert_different_registers(Z_R0, Z_R1, result, src, dst, cnt, tmp);
if (precise) {
BLOCK_COMMENT("encode_iso_array {");
@@ -5027,7 +5028,7 @@
z_vo(Vtmp1, Vtmp1, Vtmp2);
z_vn(Vtmp1, Vtmp1, Vmask);
z_vceqhs(Vtmp1, Vtmp1, Vzero); // high half of all chars must be zero for successful compress.
- z_brne(VectorBreak); // break vector loop, incompatible character found.
+ z_bvnt(VectorBreak); // break vector loop if not all vector elements compare eq -> incompatible character found.
// re-process data from current iteration in break handler.
//---< pack & store characters >---
@@ -5094,24 +5095,28 @@
z_tmll(Rcnt, min_cnt-1);
z_brnaz(ScalarShortcut); // if all bits zero, there is nothing left to do for scalar loop.
// Rix == 0 in all cases.
+ z_sllg(Z_R1, Rcnt, 1); // # src bytes already processed. Only lower 32 bits are valid!
+ // Z_R1 contents must be treated as unsigned operand! For huge strings,
+ // (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
z_lgfr(result, Rcnt); // all characters processed.
- z_sgfr(Rdst, Rcnt); // restore ptr
- z_sgfr(Rsrc, Rcnt); // restore ptr, double the element count for Rsrc restore
- z_sgfr(Rsrc, Rcnt);
+ z_slgfr(Rdst, Rcnt); // restore ptr
+ z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore
z_bru(AllDone);
bind(UnrolledBreak);
z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop
z_nilf(Z_R0, ~(min_cnt-1));
- z_sll(Rix, log_min_cnt); // # chars processed so far in UnrolledLoop, excl. current iteration.
- z_sr(Z_R0, Rix); // correct # chars processed in total.
+ z_sll(Rix, log_min_cnt); // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.
+ z_sr(Z_R0, Rix); // fix # chars processed OK so far.
if (!precise) {
z_lgfr(result, Z_R0);
+ z_sllg(Z_R1, Z_R0, 1); // # src bytes already processed. Only lower 32 bits are valid!
+ // Z_R1 contents must be treated as unsigned operand! For huge strings,
+ // (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
z_aghi(result, min_cnt/2); // min_cnt/2 characters have already been written
// but ptrs were not updated yet.
- z_sgfr(Rdst, Z_R0); // restore ptr
- z_sgfr(Rsrc, Z_R0); // restore ptr, double the element count for Rsrc restore
- z_sgfr(Rsrc, Z_R0);
+ z_slgfr(Rdst, Z_R0); // restore ptr
+ z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore
z_bru(AllDone);
}
bind(UnrolledDone);
@@ -5165,7 +5170,7 @@
z_sr(Rix, Z_R0);
}
z_lgfr(result, Rcnt); // # processed characters (if all runs ok).
- z_brz(ScalarDone);
+ z_brz(ScalarDone); // uses CC from Rix calculation
bind(ScalarLoop);
z_llh(Z_R1, 0, Z_R0, Rsrc);