8146613: PPC64: C2 does no longer respect int to long conversion for stub calls
authormdoerr
Fri, 08 Jan 2016 10:14:02 +0100
changeset 35541 cba047a086d5
parent 35540 e001ad24dcdb
child 35542 9dccb7f9f656
child 35560 b3fa0a291684
8146613: PPC64: C2 does no longer respect int to long conversion for stub calls Reviewed-by: kvn, goetz
hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Fri Jan 08 21:06:50 2016 -0800
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Fri Jan 08 10:14:02 2016 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2016 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3656,9 +3656,9 @@
   assert_different_registers(table, tc0, tc1, tc2);
   assert(table == tc3, "must be!");
 
-  if (ix0 != 0) addi(tc0, table, ix0);
-  if (ix1 != 0) addi(tc1, table, ix1);
-  if (ix2 != 0) addi(tc2, table, ix2);
+  addi(tc0, table, ix0);
+  addi(tc1, table, ix1);
+  addi(tc2, table, ix2);
   if (ix3 != 0) addi(tc3, table, ix3);
 
   return ix3;
@@ -3724,14 +3724,14 @@
   const int mainLoop_alignment = loopAlignment ? 32 : 4; // (InputForNewCode > 4 ? InputForNewCode : 32) : 4;
 
   // Process all bytes in a single-byte loop.
-  cmpdi(CCR0, len, 0);                           // Anything to do?
-  mtctr(len);
+  clrldi_(len, len, 32);                         // Enforce 32 bit. Anything to do?
   beq(CCR0, L_done);
 
   if (invertCRC) {
     nand(crc, crc, crc);                         // ~c
   }
 
+  mtctr(len);
   align(mainLoop_alignment);
   BIND(L_mainLoop);
     lbz(data, 0, buf);                           // Byte from buffer, zero-extended.
@@ -3947,7 +3947,7 @@
 #else
   Register crc_rv = tmp;                         // Load_reverse needs separate registers to work on.
                                                  // Occupies tmp, but frees up crc.
-  load_reverse_32(crc_rv, crc);                  // evert byte order because we are dealing with big-endian data.
+  load_reverse_32(crc_rv, crc);                  // Revert byte order because we are dealing with big-endian data.
   tmp = crc;
 #endif
 
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Fri Jan 08 21:06:50 2016 -0800
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Fri Jan 08 10:14:02 2016 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2016 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3437,9 +3437,9 @@
 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
                                         jint len, jlong inv,
                                         jint *m_ints) {
+  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
   int longwords = len/2;
-  assert(longwords > 0, "unsupported");
 
   // Make very sure we don't use so much space that the stack might
   // overflow. 512 jints corresponds to an 16384-bit integer and
@@ -3467,9 +3467,9 @@
 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
                                       jint len, jlong inv,
                                       jint *m_ints) {
+  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
   assert(len % 2 == 0, "array length in montgomery_square must be even");
   int longwords = len/2;
-  assert(longwords > 0, "unsupported");
 
   // Make very sure we don't use so much space that the stack might
   // overflow. 512 jints corresponds to an 16384-bit integer and
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Fri Jan 08 21:06:50 2016 -0800
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Fri Jan 08 10:14:02 2016 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2016 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1077,6 +1077,12 @@
     return start;
   }
 
+  inline void assert_positive_int(Register count) {
+#ifdef ASSERT
+    __ srdi_(R0, count, 31);
+    __ asm_assert_eq("missing zero extend", 0xAFFE);
+#endif
+  }
 
   // Generate overlap test for array copy stubs.
   //
@@ -1089,10 +1095,7 @@
     Register tmp1 = R6_ARG4;
     Register tmp2 = R7_ARG5;
 
-#ifdef ASSERT
-    __ srdi_(tmp2, R5_ARG3, 31);
-    __ asm_assert_eq("missing zero extend", 0xAFFE);
-#endif
+    assert_positive_int(R5_ARG3);
 
     __ subf(tmp1, R3_ARG1, R4_ARG2); // distance in bytes
     __ sldi(tmp2, R5_ARG3, log2_elem_size); // size in bytes
@@ -1132,14 +1135,15 @@
   address generate_disjoint_byte_copy(bool aligned, const char * name) {
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
+    assert_positive_int(R5_ARG3);
 
     Register tmp1 = R6_ARG4;
     Register tmp2 = R7_ARG5;
     Register tmp3 = R8_ARG6;
     Register tmp4 = R9_ARG7;
 
-
     Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9;
+
     // Don't try anything fancy if arrays don't have many elements.
     __ li(tmp3, 0);
     __ cmpwi(CCR0, R5_ARG3, 17);
@@ -1264,6 +1268,7 @@
   address generate_conjoint_byte_copy(bool aligned, const char * name) {
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
+    assert_positive_int(R5_ARG3);
 
     Register tmp1 = R6_ARG4;
     Register tmp2 = R7_ARG5;
@@ -1356,8 +1361,10 @@
     Register tmp4 = R9_ARG7;
 
     address start = __ function_entry();
+    assert_positive_int(R5_ARG3);
 
       Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
+
     // don't try anything fancy if arrays don't have many elements
     __ li(tmp3, 0);
     __ cmpwi(CCR0, R5_ARG3, 9);
@@ -1486,6 +1493,7 @@
   address generate_conjoint_short_copy(bool aligned, const char * name) {
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
+    assert_positive_int(R5_ARG3);
 
     Register tmp1 = R6_ARG4;
     Register tmp2 = R7_ARG5;
@@ -1528,6 +1536,7 @@
     Register tmp4 = R0;
 
     Label l_1, l_2, l_3, l_4, l_5, l_6;
+
     // for short arrays, just do single element copy
     __ li(tmp3, 0);
     __ cmpwi(CCR0, R5_ARG3, 5);
@@ -1610,6 +1619,7 @@
   address generate_disjoint_int_copy(bool aligned, const char * name) {
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
+    assert_positive_int(R5_ARG3);
     generate_disjoint_int_copy_core(aligned);
     __ li(R3_RET, 0); // return 0
     __ blr();
@@ -1695,7 +1705,7 @@
   address generate_conjoint_int_copy(bool aligned, const char * name) {
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
-
+    assert_positive_int(R5_ARG3);
     address nooverlap_target = aligned ?
       STUB_ENTRY(arrayof_jint_disjoint_arraycopy) :
       STUB_ENTRY(jint_disjoint_arraycopy);
@@ -1782,6 +1792,7 @@
   address generate_disjoint_long_copy(bool aligned, const char * name) {
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
+    assert_positive_int(R5_ARG3);
     generate_disjoint_long_copy_core(aligned);
     __ li(R3_RET, 0); // return 0
     __ blr();
@@ -1865,7 +1876,7 @@
   address generate_conjoint_long_copy(bool aligned, const char * name) {
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
-
+    assert_positive_int(R5_ARG3);
     address nooverlap_target = aligned ?
       STUB_ENTRY(arrayof_jlong_disjoint_arraycopy) :
       STUB_ENTRY(jlong_disjoint_arraycopy);
@@ -1892,7 +1903,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
 
     address start = __ function_entry();
-
+    assert_positive_int(R5_ARG3);
     address nooverlap_target = aligned ?
       STUB_ENTRY(arrayof_oop_disjoint_arraycopy) :
       STUB_ENTRY(oop_disjoint_arraycopy);
@@ -1929,7 +1940,7 @@
   address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
-
+    assert_positive_int(R5_ARG3);
     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
 
     // save some arguments, disjoint_long_copy_core destroys them.
@@ -2003,7 +2014,24 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
 
-    // TODO: Assert that int is 64 bit sign extended and arrays are not conjoint.
+    // Assert that int is 64 bit sign extended and arrays are not conjoint.
+#ifdef ASSERT
+    {
+    assert_positive_int(R5_ARG3);
+    const Register tmp1 = R11_scratch1, tmp2 = R12_scratch2;
+    Label no_overlap;
+    __ subf(tmp1, R3_ARG1, R4_ARG2); // distance in bytes
+    __ sldi(tmp2, R5_ARG3, LogBytesPerHeapOop); // size in bytes
+    __ cmpld(CCR0, R3_ARG1, R4_ARG2); // Use unsigned comparison!
+    __ cmpld(CCR1, tmp1, tmp2);
+    __ crnand(CCR0, Assembler::less, CCR1, Assembler::less);
+    // Overlaps if Src before dst and distance smaller than size.
+    // Branch to forward copy routine otherwise.
+    __ blt(CCR0, no_overlap);
+    __ stop("overlap in checkcast_copy", 0x9543);
+    __ bind(no_overlap);
+    }
+#endif
 
     gen_write_ref_array_pre_barrier(R3_from, R4_to, R5_count, dest_uninitialized, R12_tmp, /* preserve: */ R6_ckoff, R7_ckval);
 
@@ -2452,12 +2480,14 @@
                                                              STUB_ENTRY(checkcast_arraycopy));
 
     // fill routines
-    StubRoutines::_jbyte_fill          = generate_fill(T_BYTE,  false, "jbyte_fill");
-    StubRoutines::_jshort_fill         = generate_fill(T_SHORT, false, "jshort_fill");
-    StubRoutines::_jint_fill           = generate_fill(T_INT,   false, "jint_fill");
-    StubRoutines::_arrayof_jbyte_fill  = generate_fill(T_BYTE,  true, "arrayof_jbyte_fill");
-    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
-    StubRoutines::_arrayof_jint_fill   = generate_fill(T_INT,   true, "arrayof_jint_fill");
+    if (OptimizeFill) {
+      StubRoutines::_jbyte_fill          = generate_fill(T_BYTE,  false, "jbyte_fill");
+      StubRoutines::_jshort_fill         = generate_fill(T_SHORT, false, "jshort_fill");
+      StubRoutines::_jint_fill           = generate_fill(T_INT,   false, "jint_fill");
+      StubRoutines::_arrayof_jbyte_fill  = generate_fill(T_BYTE,  true, "arrayof_jbyte_fill");
+      StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
+      StubRoutines::_arrayof_jint_fill   = generate_fill(T_INT,   true, "arrayof_jint_fill");
+    }
   }
 
   // Safefetch stubs.
@@ -2542,6 +2572,11 @@
 
     BLOCK_COMMENT("Entry:");
 
+    // C2 does not respect int to long conversion for stub calls.
+    __ clrldi(xlen, xlen, 32);
+    __ clrldi(ylen, ylen, 32);
+    __ clrldi(zlen, zlen, 32);
+
     // Save non-volatile regs (frameless).
     int current_offs = 8;
     __ std(R24, -current_offs, R1_SP); current_offs += 8;