8193443: [s390]: EncodeISOArray generates wrong vector code
authorlucy
Thu, 14 Dec 2017 12:02:16 +0100
changeset 48331 a8e39cc7b88f
parent 48330 e8230b52a8f4
child 48332 651a95f30dfb
8193443: [s390]: EncodeISOArray generates wrong vector code Reviewed-by: goetz, mdoerr
src/hotspot/cpu/s390/assembler_s390.hpp
src/hotspot/cpu/s390/assembler_s390.inline.hpp
src/hotspot/cpu/s390/macroAssembler_s390.cpp
--- a/src/hotspot/cpu/s390/assembler_s390.hpp	Wed Dec 13 21:41:50 2017 -0800
+++ b/src/hotspot/cpu/s390/assembler_s390.hpp	Thu Dec 14 12:02:16 2017 +0100
@@ -1459,6 +1459,10 @@
     bcondLogNotZero          =  bcondLogNotZero_Carry | bcondLogNotZero_NoCarry,
     bcondLogCarry            =  bcondLogZero_Carry | bcondLogNotZero_Carry,
     bcondLogBorrow           =  /* bcondLogZero_Borrow | */ bcondLogNotZero_Borrow,
+    // Vector compare instructions
+    bcondVAlltrue    =  8,  // All  vector elements evaluate true
+    bcondVMixed      =  4,  // Some vector elements evaluate true, some false
+    bcondVAllfalse   =  1,  // All  vector elements evaluate false
     // string search instructions
     bcondFound       =  4,
     bcondNotFound    =  2,
@@ -3022,6 +3026,12 @@
   inline void z_btrue( Label& L);
   inline void z_bfalse(Label& L);
 
+  inline void z_bvat(Label& L);   // all true
+  inline void z_bvnt(Label& L);   // not all true (mixed or all false)
+  inline void z_bvmix(Label& L);  // mixed true and false
+  inline void z_bvaf(Label& L);   // not all false (mixed or all true)
+  inline void z_bvnf(Label& L);   // all false
+
   inline void z_brno( Label& L);
 
 
--- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp	Wed Dec 13 21:41:50 2017 -0800
+++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp	Thu Dec 14 12:02:16 2017 +0100
@@ -1315,23 +1315,28 @@
 
 inline void Assembler::z_exrl(Register r1, Label& L) { z_exrl(r1, target(L)); }  // z10
 inline void Assembler::z_larl(Register r1, Label& L) { z_larl(r1, target(L)); }
-inline void Assembler::z_bru(   Label& L) { z_brc(bcondAlways,target(L)); }
-inline void Assembler::z_brul(  Label& L) { z_brcl(bcondAlways,target(L)); }
-inline void Assembler::z_brul( address a) { z_brcl(bcondAlways,a); }
-inline void Assembler::z_brh(   Label& L) { z_brc(bcondHigh,target(L)); }
-inline void Assembler::z_brl(   Label& L) { z_brc(bcondLow,target(L)); }
-inline void Assembler::z_bre(   Label& L) { z_brc(bcondEqual,target(L)); }
-inline void Assembler::z_brnh(  Label& L) { z_brc(bcondNotHigh,target(L)); }
-inline void Assembler::z_brnl(  Label& L) { z_brc(bcondNotLow,target(L)); }
-inline void Assembler::z_brne(  Label& L) { z_brc(bcondNotEqual,target(L)); }
-inline void Assembler::z_brz(   Label& L) { z_brc(bcondZero,target(L)); }
-inline void Assembler::z_brnz(  Label& L) { z_brc(bcondNotZero,target(L)); }
-inline void Assembler::z_braz(  Label& L) { z_brc(bcondAllZero,target(L)); }
-inline void Assembler::z_brnaz( Label& L) { z_brc(bcondNotAllZero,target(L)); }
+inline void Assembler::z_bru(   Label& L) { z_brc(bcondAlways, target(L)); }
+inline void Assembler::z_brul(  Label& L) { z_brcl(bcondAlways, target(L)); }
+inline void Assembler::z_brul( address a) { z_brcl(bcondAlways,a ); }
+inline void Assembler::z_brh(   Label& L) { z_brc(bcondHigh, target(L)); }
+inline void Assembler::z_brl(   Label& L) { z_brc(bcondLow, target(L)); }
+inline void Assembler::z_bre(   Label& L) { z_brc(bcondEqual, target(L)); }
+inline void Assembler::z_brnh(  Label& L) { z_brc(bcondNotHigh, target(L)); }
+inline void Assembler::z_brnl(  Label& L) { z_brc(bcondNotLow, target(L)); }
+inline void Assembler::z_brne(  Label& L) { z_brc(bcondNotEqual, target(L)); }
+inline void Assembler::z_brz(   Label& L) { z_brc(bcondZero, target(L)); }
+inline void Assembler::z_brnz(  Label& L) { z_brc(bcondNotZero, target(L)); }
+inline void Assembler::z_braz(  Label& L) { z_brc(bcondAllZero, target(L)); }
+inline void Assembler::z_brnaz( Label& L) { z_brc(bcondNotAllZero, target(L)); }
 inline void Assembler::z_brnp(  Label& L) { z_brc( bcondNotPositive, target( L)); }
-inline void Assembler::z_btrue( Label& L) { z_brc(bcondAllOne,target(L)); }
-inline void Assembler::z_bfalse(Label& L) { z_brc(bcondAllZero,target(L)); }
-inline void Assembler::z_brno(  Label& L) { z_brc(bcondNotOrdered,target(L)); }
+inline void Assembler::z_btrue( Label& L) { z_brc(bcondAllOne, target(L)); }
+inline void Assembler::z_bfalse(Label& L) { z_brc(bcondAllZero, target(L)); }
+inline void Assembler::z_bvat(  Label& L) { z_brc(bcondVAlltrue, target(L)); }
+inline void Assembler::z_bvnt(  Label& L) { z_brc((Assembler::branch_condition)(bcondVMixed | bcondVAllfalse), target(L)); }
+inline void Assembler::z_bvmix( Label& L) { z_brc(bcondVMixed, target(L)); }
+inline void Assembler::z_bvaf(  Label& L) { z_brc(bcondVAllfalse, target(L)); }
+inline void Assembler::z_bvnf(  Label& L) { z_brc((Assembler::branch_condition)(bcondVMixed | bcondVAlltrue), target(L)); }
+inline void Assembler::z_brno(  Label& L) { z_brc(bcondNotOrdered, target(L)); }
 inline void Assembler::z_brc( branch_condition m, Label& L) { z_brc(m, target(L)); }
 inline void Assembler::z_brcl(branch_condition m, Label& L) { z_brcl(m, target(L)); }
 
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Wed Dec 13 21:41:50 2017 -0800
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Thu Dec 14 12:02:16 2017 +0100
@@ -4914,13 +4914,14 @@
 // The result is the number of characters copied before the first incompatible character was found.
 // If precise is true, the processing stops exactly at this point. Otherwise, the result may be off
 // by a few bytes. The result always indicates the number of copied characters.
+// When used as a character index, the returned value points to the first incompatible character.
 //
 // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
 // - Different number of characters may have been written to dead array (if precise is false).
 // - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
 unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt,
                                              Register tmp,    bool precise) {
-  assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);
+  assert_different_registers(Z_R0, Z_R1, result, src, dst, cnt, tmp);
 
   if (precise) {
     BLOCK_COMMENT("encode_iso_array {");
@@ -5027,7 +5028,7 @@
       z_vo(Vtmp1, Vtmp1, Vtmp2);
       z_vn(Vtmp1, Vtmp1, Vmask);
       z_vceqhs(Vtmp1, Vtmp1, Vzero);       // high half of all chars must be zero for successful compress.
-      z_brne(VectorBreak);                 // break vector loop, incompatible character found.
+      z_bvnt(VectorBreak);                 // break vector loop if not all vector elements compare eq -> incompatible character found.
                                            // re-process data from current iteration in break handler.
 
       //---<  pack & store characters  >---
@@ -5094,24 +5095,28 @@
     z_tmll(Rcnt, min_cnt-1);
     z_brnaz(ScalarShortcut);               // if all bits zero, there is nothing left to do for scalar loop.
                                            // Rix == 0 in all cases.
+    z_sllg(Z_R1, Rcnt, 1);                 // # src bytes already processed. Only lower 32 bits are valid!
+                                           //   Z_R1 contents must be treated as unsigned operand! For huge strings,
+                                           //   (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
     z_lgfr(result, Rcnt);                  // all characters processed.
-    z_sgfr(Rdst, Rcnt);                    // restore ptr
-    z_sgfr(Rsrc, Rcnt);                    // restore ptr, double the element count for Rsrc restore
-    z_sgfr(Rsrc, Rcnt);
+    z_slgfr(Rdst, Rcnt);                   // restore ptr
+    z_slgfr(Rsrc, Z_R1);                   // restore ptr, double the element count for Rsrc restore
     z_bru(AllDone);
 
     bind(UnrolledBreak);
     z_lgfr(Z_R0, Rcnt);                    // # chars processed in total after unrolled loop
     z_nilf(Z_R0, ~(min_cnt-1));
-    z_sll(Rix, log_min_cnt);               // # chars processed so far in UnrolledLoop, excl. current iteration.
-    z_sr(Z_R0, Rix);                       // correct # chars processed in total.
+    z_sll(Rix, log_min_cnt);               // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.
+    z_sr(Z_R0, Rix);                       // fix # chars processed OK so far.
     if (!precise) {
       z_lgfr(result, Z_R0);
+      z_sllg(Z_R1, Z_R0, 1);               // # src bytes already processed. Only lower 32 bits are valid!
+                                           //   Z_R1 contents must be treated as unsigned operand! For huge strings,
+                                           //   (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
       z_aghi(result, min_cnt/2);           // min_cnt/2 characters have already been written
                                            // but ptrs were not updated yet.
-      z_sgfr(Rdst, Z_R0);                  // restore ptr
-      z_sgfr(Rsrc, Z_R0);                  // restore ptr, double the element count for Rsrc restore
-      z_sgfr(Rsrc, Z_R0);
+      z_slgfr(Rdst, Z_R0);                 // restore ptr
+      z_slgfr(Rsrc, Z_R1);                 // restore ptr, double the element count for Rsrc restore
       z_bru(AllDone);
     }
     bind(UnrolledDone);
@@ -5165,7 +5170,7 @@
       z_sr(Rix, Z_R0);
     }
     z_lgfr(result, Rcnt);                  // # processed characters (if all runs ok).
-    z_brz(ScalarDone);
+    z_brz(ScalarDone);                     // uses CC from Rix calculation
 
     bind(ScalarLoop);
       z_llh(Z_R1, 0, Z_R0, Rsrc);