Merge
authorlana
Thu, 30 Jul 2015 15:23:25 -0700
changeset 31869 e6eb1f2fd18c
parent 31847 573eadb22d9b (current diff)
parent 31868 52b21e0e85f9 (diff)
child 31870 7320ca75cae7
Merge
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Thu Jul 30 15:23:25 2015 -0700
@@ -14101,7 +14101,7 @@
 instruct vsub2F(vecD dst, vecD src1, vecD src2)
 %{
   predicate(n->as_Vector()->length() == 2);
-  match(Set dst (AddVF src1 src2));
+  match(Set dst (SubVF src1 src2));
   ins_cost(INSN_COST);
   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
   ins_encode %{
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -261,7 +261,7 @@
 
 // LoadStoreExclusiveOp
     __ stxr(r20, r21, r2);                             //       stxr    w20, x21, [x2]
-    __ stlxr(r7, r29, r7);                             //       stlxr   w7, x29, [x7]
+    __ stlxr(r5, r29, r7);                             //       stlxr   w5, x29, [x7]
     __ ldxr(r5, r16);                                  //       ldxr    x5, [x16]
     __ ldaxr(r27, r29);                                //       ldaxr   x27, [x29]
     __ stlr(r0, r29);                                  //       stlr    x0, [x29]
@@ -295,7 +295,7 @@
     __ ldxp(r8, r2, r19);                              //       ldxp    x8, x2, [x19]
     __ ldaxp(r7, r19, r14);                            //       ldaxp   x7, x19, [x14]
     __ stxp(r8, r27, r28, r5);                         //       stxp    w8, x27, x28, [x5]
-    __ stlxp(r6, r8, r14, r6);                         //       stlxp   w6, x8, x14, [x6]
+    __ stlxp(r5, r8, r14, r6);                         //       stlxp   w5, x8, x14, [x6]
 
 // LoadStoreExclusiveOp
     __ ldxpw(r25, r4, r22);                            //       ldxp    w25, w4, [x22]
@@ -768,7 +768,7 @@
  24c:   d61f0040        br      x2
  250:   d63f00a0        blr     x5
  254:   c8147c55        stxr    w20, x21, [x2]
- 258:   c807fcfd        stlxr   w7, x29, [x7]
+ 258:   c805fcfd        stlxr   w5, x29, [x7]
  25c:   c85f7e05        ldxr    x5, [x16]
  260:   c85fffbb        ldaxr   x27, [x29]
  264:   c89fffa0        stlr    x0, [x29]
@@ -794,7 +794,7 @@
  2b4:   c87f0a68        ldxp    x8, x2, [x19]
  2b8:   c87fcdc7        ldaxp   x7, x19, [x14]
  2bc:   c82870bb        stxp    w8, x27, x28, [x5]
- 2c0:   c826b8c8        stlxp   w6, x8, x14, [x6]
+ 2c0:   c825b8c8        stlxp   w5, x8, x14, [x6]
  2c4:   887f12d9        ldxp    w25, w4, [x22]
  2c8:   887fb9ee        ldaxp   w14, w14, [x15]
  2cc:   8834215a        stxp    w20, w26, w8, [x10]
@@ -1084,14 +1084,14 @@
     0xd4063721,     0xd4035082,     0xd400bfe3,     0xd4282fc0,
     0xd444c320,     0xd503201f,     0xd69f03e0,     0xd6bf03e0,
     0xd5033fdf,     0xd5033f9f,     0xd5033abf,     0xd61f0040,
-    0xd63f00a0,     0xc8147c55,     0xc807fcfd,     0xc85f7e05,
+    0xd63f00a0,     0xc8147c55,     0xc805fcfd,     0xc85f7e05,
     0xc85fffbb,     0xc89fffa0,     0xc8dfff95,     0x88187cf8,
     0x8815ff9a,     0x885f7cd5,     0x885fffcf,     0x889ffc73,
     0x88dffc56,     0x48127c0f,     0x480bff85,     0x485f7cdd,
     0x485ffcf2,     0x489fff99,     0x48dffe62,     0x080a7c3e,
     0x0814fed5,     0x085f7c59,     0x085ffcb8,     0x089ffc70,
     0x08dfffb6,     0xc87f0a68,     0xc87fcdc7,     0xc82870bb,
-    0xc826b8c8,     0x887f12d9,     0x887fb9ee,     0x8834215a,
+    0xc825b8c8,     0x887f12d9,     0x887fb9ee,     0x8834215a,
     0x8837ca52,     0xf806317e,     0xb81b3337,     0x39000dc2,
     0x78005149,     0xf84391f4,     0xb85b220c,     0x385fd356,
     0x785d127e,     0x389f4149,     0x79801e3c,     0x79c014a3,
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -1106,11 +1106,13 @@
 
 #define INSN4(NAME, sz, op, o0) /* Four registers */                    \
   void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) {     \
+    assert(Rs != Rn, "unpredictable instruction");                  \
     load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0);                 \
   }
 
 #define INSN3(NAME, sz, op, o0) /* Three registers */                   \
   void NAME(Register Rs, Register Rt, Register Rn) {                    \
+    assert(Rs != Rn, "unpredictable instruction");                  \
     load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0);    \
   }
 
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -33,6 +33,7 @@
 #include "compiler/disassembler.hpp"
 #include "memory/resourceArea.hpp"
 #include "nativeInst_aarch64.hpp"
+#include "oops/klass.inline.hpp"
 #include "opto/compile.hpp"
 #include "opto/node.hpp"
 #include "runtime/biasedLocking.hpp"
@@ -3791,8 +3792,8 @@
     br(Assembler::HI, slow_case);
 
     // If heap_top hasn't been changed by some other thread, update it.
-    stlxr(rscratch1, end, rscratch1);
-    cbnzw(rscratch1, retry);
+    stlxr(rscratch2, end, rscratch1);
+    cbnzw(rscratch2, retry);
   }
 }
 
--- a/hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,7 @@
   address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
   address generate_Reference_get_entry(void);
 
-  // Not supported
-  address generate_CRC32_update_entry() { return NULL; }
-  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
+  address generate_CRC32_update_entry();
+  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+
 #endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -50,6 +50,7 @@
 #else
 #define BLOCK_COMMENT(str) block_comment(str)
 #endif
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 
 #ifdef ASSERT
 // On RISC, there's no benefit to verifying instruction boundaries.
@@ -3433,6 +3434,418 @@
   bind(Ldone_false);
 }
 
+// Helpers for Intrinsic Emitters
+//
+// Revert the byte order of a 32bit value in a register
+//   src: 0x44556677
+//   dst: 0x77665544
+// Three steps to obtain the result:
+//  1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word
+//     into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared.
+//     This value initializes dst.
+//  2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost
+//     byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go.
+//     This value is mask inserted into dst with a [0..23] mask of 1s.
+//  3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position.
+//     This value is mask inserted into dst with a [8..15] mask of 1s.
+void MacroAssembler::load_reverse_32(Register dst, Register src) {
+  assert_different_registers(dst, src);
+
+  rldicl(dst, src, (4+1)*8, 56);       // Rotate byte 4 into position 7 (rightmost), clear all to the left.
+  rlwimi(dst, src,     3*8,  0, 23);   // Insert byte 5 into position 6, 7 into 4, leave pos 7 alone.
+  rlwimi(dst, src,     1*8,  8, 15);   // Insert byte 6 into position 5, leave the rest alone.
+}
+
+// Calculate the column addresses of the crc32 lookup table into distinct registers.
+// This loop-invariant calculation is moved out of the loop body, reducing the loop
+// body size from 20 to 16 instructions.
+// Returns the offset that was used to calculate the address of column tc3.
+// Due to register shortage, setting tc3 may overwrite table. With the return offset
+// at hand, the original table address can be easily reconstructed.
+int MacroAssembler::crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3) {
+
+#ifdef VM_LITTLE_ENDIAN
+  // This is what we implement (the DOLIT4 part):
+  // ========================================================================= */
+  // #define DOLIT4 c ^= *buf4++; \
+  //         c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
+  //             crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
+  // #define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
+  // ========================================================================= */
+  const int ix0 = 3*(4*CRC32_COLUMN_SIZE);
+  const int ix1 = 2*(4*CRC32_COLUMN_SIZE);
+  const int ix2 = 1*(4*CRC32_COLUMN_SIZE);
+  const int ix3 = 0*(4*CRC32_COLUMN_SIZE);
+#else
+  // This is what we implement (the DOBIG4 part):
+  // =========================================================================
+  // #define DOBIG4 c ^= *++buf4; \
+  //         c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+  //             crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+  // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+  // =========================================================================
+  const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
+  const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
+  const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
+  const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
+#endif
+  assert_different_registers(table, tc0, tc1, tc2);
+  assert(table == tc3, "must be!");
+
+  if (ix0 != 0) addi(tc0, table, ix0);
+  if (ix1 != 0) addi(tc1, table, ix1);
+  if (ix2 != 0) addi(tc2, table, ix2);
+  if (ix3 != 0) addi(tc3, table, ix3);
+
+  return ix3;
+}
+
+/**
+ * uint32_t crc;
+ * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
+ */
+void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
+  assert_different_registers(crc, table, tmp);
+  assert_different_registers(val, table);
+
+  if (crc == val) {                   // Must rotate first to use the unmodified value.
+    rlwinm(tmp, val, 2, 24-2, 31-2);  // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
+                                      // As we use a word (4-byte) instruction, we have to adapt the mask bit positions.
+    srwi(crc, crc, 8);                // Unsigned shift, clear leftmost 8 bits.
+  } else {
+    srwi(crc, crc, 8);                // Unsigned shift, clear leftmost 8 bits.
+    rlwinm(tmp, val, 2, 24-2, 31-2);  // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
+  }
+  lwzx(tmp, table, tmp);
+  xorr(crc, crc, tmp);
+}
+
+/**
+ * uint32_t crc;
+ * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
+ */
+void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
+  fold_byte_crc32(crc, crc, table, tmp);
+}
+
+/**
+ * Emits code to update CRC-32 with a byte value according to constants in table.
+ *
+ * @param [in,out]crc   Register containing the crc.
+ * @param [in]val       Register containing the byte to fold into the CRC.
+ * @param [in]table     Register containing the table of crc constants.
+ *
+ * uint32_t crc;
+ * val = crc_table[(val ^ crc) & 0xFF];
+ * crc = val ^ (crc >> 8);
+ */
+void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
+  BLOCK_COMMENT("update_byte_crc32:");
+  xorr(val, val, crc);
+  fold_byte_crc32(crc, val, table, val);
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register pointing to CRC table
+ */
+void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
+                                           Register data, bool loopAlignment, bool invertCRC) {
+  assert_different_registers(crc, buf, len, table, data);
+
+  Label L_mainLoop, L_done;
+  const int mainLoop_stepping  = 1;
+  const int mainLoop_alignment = loopAlignment ? 32 : 4; // (InputForNewCode > 4 ? InputForNewCode : 32) : 4;
+
+  // Process all bytes in a single-byte loop.
+  cmpdi(CCR0, len, 0);                           // Anything to do?
+  mtctr(len);
+  beq(CCR0, L_done);
+
+  if (invertCRC) {
+    nand(crc, crc, crc);                         // ~c
+  }
+
+  align(mainLoop_alignment);
+  BIND(L_mainLoop);
+    lbz(data, 0, buf);                           // Byte from buffer, zero-extended.
+    addi(buf, buf, mainLoop_stepping);           // Advance buffer position.
+    update_byte_crc32(crc, data, table);
+    bdnz(L_mainLoop);                            // Iterate.
+
+  if (invertCRC) {
+    nand(crc, crc, crc);                         // ~c
+  }
+
+  bind(L_done);
+}
+
+/**
+ * Emits code to update CRC-32 with a 4-byte value according to constants in table
+ * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c
+ */
+// A not on the lookup table address(es):
+// The lookup table consists of two sets of four columns each.
+// The columns {0..3} are used for little-endian machines.
+// The columns {4..7} are used for big-endian machines.
+// To save the effort of adding the column offset to the table address each time
+// a table element is looked up, it is possible to pass the pre-calculated
+// column addresses.
+// Uses R9..R12 as work register. Must be saved/restored by caller, if necessary.
+void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
+                                        Register t0,  Register t1,  Register t2,  Register t3,
+                                        Register tc0, Register tc1, Register tc2, Register tc3) {
+  assert_different_registers(crc, t3);
+
+  // XOR crc with next four bytes of buffer.
+  lwz(t3, bufDisp, buf);
+  if (bufInc != 0) {
+    addi(buf, buf, bufInc);
+  }
+  xorr(t3, t3, crc);
+
+  // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices.
+  rlwinm(t0, t3,  2,         24-2, 31-2);  // ((t1 >>  0) & 0xff) << 2
+  rlwinm(t1, t3,  32+(2- 8), 24-2, 31-2);  // ((t1 >>  8) & 0xff) << 2
+  rlwinm(t2, t3,  32+(2-16), 24-2, 31-2);  // ((t1 >> 16) & 0xff) << 2
+  rlwinm(t3, t3,  32+(2-24), 24-2, 31-2);  // ((t1 >> 24) & 0xff) << 2
+
+  // Use the pre-calculated column addresses.
+  // Load pre-calculated table values.
+  lwzx(t0, tc0, t0);
+  lwzx(t1, tc1, t1);
+  lwzx(t2, tc2, t2);
+  lwzx(t3, tc3, t3);
+
+  // Calculate new crc from table values.
+  xorr(t0,  t0, t1);
+  xorr(t2,  t2, t3);
+  xorr(crc, t0, t2);  // Now crc contains the final checksum value.
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register pointing to CRC table
+ *
+ * Uses R9..R12 as work register. Must be saved/restored by caller!
+ */
+void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
+                                        Register t0,  Register t1,  Register t2,  Register t3,
+                                        Register tc0, Register tc1, Register tc2, Register tc3) {
+  assert_different_registers(crc, buf, len, table);
+
+  Label L_mainLoop, L_tail;
+  Register  tmp  = t0;
+  Register  data = t0;
+  Register  tmp2 = t1;
+  const int mainLoop_stepping  = 8;
+  const int tailLoop_stepping  = 1;
+  const int log_stepping       = exact_log2(mainLoop_stepping);
+  const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32;
+  const int complexThreshold   = 2*mainLoop_stepping;
+
+  // Don't test for len <= 0 here. This pathological case should not occur anyway.
+  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
+  // The situation itself is detected and handled correctly by the conditional branches
+  // following  aghi(len, -stepping) and aghi(len, +stepping).
+  assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
+
+  BLOCK_COMMENT("kernel_crc32_2word {");
+
+  nand(crc, crc, crc);                           // ~c
+
+  // Check for short (<mainLoop_stepping) buffer.
+  cmpdi(CCR0, len, complexThreshold);
+  blt(CCR0, L_tail);
+
+  // Pre-mainLoop alignment did show a slight (1%) positive effect on performance.
+  // We leave the code in for reference. Maybe we need alignment when we exploit vector instructions.
+  {
+    // Align buf addr to mainLoop_stepping boundary.
+    neg(tmp2, buf);                           // Calculate # preLoop iterations for alignment.
+    rldicl(tmp2, tmp2, 0, 64-log_stepping);   // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63.
+
+    if (complexThreshold > mainLoop_stepping) {
+      sub(len, len, tmp2);                       // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
+    } else {
+      sub(tmp, len, tmp2);                       // Remaining bytes for main loop.
+      cmpdi(CCR0, tmp, mainLoop_stepping);
+      blt(CCR0, L_tail);                         // For less than one mainloop_stepping left, do only tail processing
+      mr(len, tmp);                              // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
+    }
+    update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
+  }
+
+  srdi(tmp2, len, log_stepping);                 // #iterations for mainLoop
+  andi(len, len, mainLoop_stepping-1);           // remaining bytes for tailLoop
+  mtctr(tmp2);
+
+#ifdef VM_LITTLE_ENDIAN
+  Register crc_rv = crc;
+#else
+  Register crc_rv = tmp;                         // Load_reverse needs separate registers to work on.
+                                                 // Occupies tmp, but frees up crc.
+  load_reverse_32(crc_rv, crc);                  // Revert byte order because we are dealing with big-endian data.
+  tmp = crc;
+#endif
+
+  int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
+
+  align(mainLoop_alignment);                     // Octoword-aligned loop address. Shows 2% improvement.
+  BIND(L_mainLoop);
+    update_1word_crc32(crc_rv, buf, table, 0, 0, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
+    update_1word_crc32(crc_rv, buf, table, 4, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
+    bdnz(L_mainLoop);
+
+#ifndef VM_LITTLE_ENDIAN
+  load_reverse_32(crc, crc_rv);                  // Revert byte order because we are dealing with big-endian data.
+  tmp = crc_rv;                                  // Tmp uses it's original register again.
+#endif
+
+  // Restore original table address for tailLoop.
+  if (reconstructTableOffset != 0) {
+    addi(table, table, -reconstructTableOffset);
+  }
+
+  // Process last few (<complexThreshold) bytes of buffer.
+  BIND(L_tail);
+  update_byteLoop_crc32(crc, buf, len, table, data, false, false);
+
+  nand(crc, crc, crc);                           // ~c
+  BLOCK_COMMENT("} kernel_crc32_2word");
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register pointing to CRC table
+ *
+ * uses R9..R12 as work register. Must be saved/restored by caller!
+ */
+void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
+                                        Register t0,  Register t1,  Register t2,  Register t3,
+                                        Register tc0, Register tc1, Register tc2, Register tc3) {
+  assert_different_registers(crc, buf, len, table);
+
+  Label L_mainLoop, L_tail;
+  Register  tmp          = t0;
+  Register  data         = t0;
+  Register  tmp2         = t1;
+  const int mainLoop_stepping  = 4;
+  const int tailLoop_stepping  = 1;
+  const int log_stepping       = exact_log2(mainLoop_stepping);
+  const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32;
+  const int complexThreshold   = 2*mainLoop_stepping;
+
+  // Don't test for len <= 0 here. This pathological case should not occur anyway.
+  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
+  // The situation itself is detected and handled correctly by the conditional branches
+  // following  aghi(len, -stepping) and aghi(len, +stepping).
+  assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
+
+  BLOCK_COMMENT("kernel_crc32_1word {");
+
+  nand(crc, crc, crc);                           // ~c
+
+  // Check for short (<mainLoop_stepping) buffer.
+  cmpdi(CCR0, len, complexThreshold);
+  blt(CCR0, L_tail);
+
+  // Pre-mainLoop alignment did show a slight (1%) positive effect on performance.
+  // We leave the code in for reference. Maybe we need alignment when we exploit vector instructions.
+  {
+    // Align buf addr to mainLoop_stepping boundary.
+    neg(tmp2, buf);                              // Calculate # preLoop iterations for alignment.
+    rldicl(tmp2, tmp2, 0, 64-log_stepping);      // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63.
+
+    if (complexThreshold > mainLoop_stepping) {
+      sub(len, len, tmp2);                       // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
+    } else {
+      sub(tmp, len, tmp2);                       // Remaining bytes for main loop.
+      cmpdi(CCR0, tmp, mainLoop_stepping);
+      blt(CCR0, L_tail);                         // For less than one mainloop_stepping left, do only tail processing
+      mr(len, tmp);                              // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
+    }
+    update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
+  }
+
+  srdi(tmp2, len, log_stepping);                 // #iterations for mainLoop
+  andi(len, len, mainLoop_stepping-1);           // remaining bytes for tailLoop
+  mtctr(tmp2);
+
+#ifdef VM_LITTLE_ENDIAN
+  Register crc_rv = crc;
+#else
+  Register crc_rv = tmp;                         // Load_reverse needs separate registers to work on.
+                                                 // Occupies tmp, but frees up crc.
+  load_reverse_32(crc_rv, crc);                  // evert byte order because we are dealing with big-endian data.
+  tmp = crc;
+#endif
+
+  int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
+
+  align(mainLoop_alignment);                     // Octoword-aligned loop address. Shows 2% improvement.
+  BIND(L_mainLoop);
+    update_1word_crc32(crc_rv, buf, table, 0, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
+    bdnz(L_mainLoop);
+
+#ifndef VM_LITTLE_ENDIAN
+  load_reverse_32(crc, crc_rv);                  // Revert byte order because we are dealing with big-endian data.
+  tmp = crc_rv;                                  // Tmp uses it's original register again.
+#endif
+
+  // Restore original table address for tailLoop.
+  if (reconstructTableOffset != 0) {
+    addi(table, table, -reconstructTableOffset);
+  }
+
+  // Process last few (<complexThreshold) bytes of buffer.
+  BIND(L_tail);
+  update_byteLoop_crc32(crc, buf, len, table, data, false, false);
+
+  nand(crc, crc, crc);                           // ~c
+  BLOCK_COMMENT("} kernel_crc32_1word");
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register pointing to CRC table
+ *
+ * Uses R7_ARG5, R8_ARG6 as work registers.
+ */
+void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
+                                        Register t0,  Register t1,  Register t2,  Register t3) {
+  assert_different_registers(crc, buf, len, table);
+
+  Register  data = t0;                   // Holds the current byte to be folded into crc.
+
+  BLOCK_COMMENT("kernel_crc32_1byte {");
+
+  // Process all bytes in a single-byte loop.
+  update_byteLoop_crc32(crc, buf, len, table, data, true, true);
+
+  BLOCK_COMMENT("} kernel_crc32_1byte");
+}
+
+void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) {
+  assert_different_registers(crc, buf, /* len,  not used!! */ table, tmp);
+
+  BLOCK_COMMENT("kernel_crc32_singleByte:");
+  nand(crc, crc, crc);       // ~c
+
+  lbz(tmp, 0, buf);          // Byte from buffer, zero-extended.
+  update_byte_crc32(crc, tmp, table);
+
+  nand(crc, crc, crc);       // ~c
+}
+
 // dest_lo += src1 + src2
 // dest_hi += carry1 + carry2
 void MacroAssembler::add2_with_carry(Register dest_hi,
@@ -3515,7 +3928,7 @@
   b(L_multiply);
 
 
-  bind( L_one_x ); // Load one 32 bit portion of x as (0,value).
+  bind(L_one_x); // Load one 32 bit portion of x as (0,value).
 
   lwz(x_xstart, 0, x);
   b(L_first_loop);
@@ -3534,7 +3947,7 @@
   //  z[kdx] = (jlong)product;
 
   sldi(tmp, idx, LogBytesPerInt);
-  if ( offset ) {
+  if (offset) {
     addi(tmp, tmp, offset);
   }
   ldx(yz_idx, y, tmp);
@@ -3551,7 +3964,7 @@
   add2_with_carry(product_high, product, carry, yz_idx);
 
   sldi(tmp, idx, LogBytesPerInt);
-  if ( offset ) {
+  if (offset) {
     addi(tmp, tmp, offset);
   }
 #ifdef VM_LITTLE_ENDIAN
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -702,6 +702,27 @@
                        Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
                        Register tmp11, Register tmp12, Register tmp13);
 
+  // CRC32 Intrinsics.
+  void load_reverse_32(Register dst, Register src);
+  int  crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
+  void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
+  void fold_8bit_crc32(Register crc, Register table, Register tmp);
+  void update_byte_crc32(Register crc, Register val, Register table);
+  void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
+                             Register data, bool loopAlignment, bool invertCRC);
+  void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
+                          Register t0,  Register t1,  Register t2,  Register t3,
+                          Register tc0, Register tc1, Register tc2, Register tc3);
+  void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
+                          Register t0,  Register t1,  Register t2,  Register t3,
+                          Register tc0, Register tc1, Register tc2, Register tc3);
+  void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
+                          Register t0,  Register t1,  Register t2,  Register t3,
+                          Register tc0, Register tc1, Register tc2, Register tc3);
+  void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
+                          Register t0,  Register t1,  Register t2,  Register t3);
+  void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp);
+
   //
   // Debugging
   //
--- a/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -27,6 +27,7 @@
 #include "asm/assembler.inline.hpp"
 #include "code/relocInfo.hpp"
 #include "nativeInst_ppc.hpp"
+#include "oops/klass.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/safepoint.hpp"
 
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -2126,6 +2126,54 @@
     return start;
   }
 
+  /**
+   * Arguments:
+   *
+   * Inputs:
+   *   R3_ARG1    - int   crc
+   *   R4_ARG2    - byte* buf
+   *   R5_ARG3    - int   length (of buffer)
+   *
+   * scratch:
+   *   R6_ARG4    - crc table address
+   *   R7_ARG5    - tmp1
+   *   R8_ARG6    - tmp2
+   *
+   * Ouput:
+   *   R3_RET     - int   crc result
+   */
+  // Compute CRC32 function.
+  address generate_CRC32_updateBytes(const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();  // Remember stub start address (is rtn value).
+
+    // arguments to kernel_crc32:
+    Register       crc     = R3_ARG1;  // Current checksum, preset by caller or result from previous call.
+    Register       data    = R4_ARG2;  // source byte array
+    Register       dataLen = R5_ARG3;  // #bytes to process
+    Register       table   = R6_ARG4;  // crc table address
+
+    Register       t0      = R9;       // work reg for kernel* emitters
+    Register       t1      = R10;      // work reg for kernel* emitters
+    Register       t2      = R11;      // work reg for kernel* emitters
+    Register       t3      = R12;      // work reg for kernel* emitters
+
+    BLOCK_COMMENT("Stub body {");
+    assert_different_registers(crc, data, dataLen, table);
+
+    StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
+
+    __ kernel_crc32_1byte(crc, data, dataLen, table, t0, t1, t2, t3);
+
+    BLOCK_COMMENT("return");
+    __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
+    __ blr();
+
+    BLOCK_COMMENT("} Stub body");
+    return start;
+  }
+
   // Initialization
   void generate_initial() {
     // Generates all stubs and initializes the entry points
@@ -2144,6 +2192,12 @@
     StubRoutines::_throw_StackOverflowError_entry   =
       generate_throw_exception("StackOverflowError throw_exception",
                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
+
+    // CRC32 Intrinsics.
+    if (UseCRC32Intrinsics) {
+      StubRoutines::_crc_table_adr    = (address)StubRoutines::ppc64::_crc_table;
+      StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes");
+    }
   }
 
   void generate_all() {
--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,457 @@
  *
  */
 
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+
 // Implementation of the platform-specific part of StubRoutines - for
 // a description of how to extend it, see the stubRoutines.hpp file.
 
 
+#define __ masm->
+
+// CRC32 Intrinsics.
+void StubRoutines::ppc64::generate_load_crc_table_addr(MacroAssembler* masm, Register table) {
+  __ load_const(table, StubRoutines::_crc_table_adr);
+}
+
+// CRC32 Intrinsics.
+/**
+ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.h
+ */
+juint StubRoutines::ppc64::_crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE] = {
+  {
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL
+#ifdef  CRC32_BYFOUR
+  },
+  {
+    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+    0x9324fd72UL
+  },
+  {
+    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+    0xbe9834edUL
+  },
+  {
+    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+    0xde0506f1UL
+  },
+  {
+    0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
+    0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
+    0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
+    0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
+    0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
+    0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
+    0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
+    0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
+    0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
+    0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
+    0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
+    0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
+    0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
+    0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
+    0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
+    0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
+    0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
+    0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
+    0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
+    0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
+    0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
+    0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
+    0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
+    0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
+    0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
+    0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
+    0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
+    0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
+    0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
+    0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
+    0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
+    0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
+    0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
+    0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
+    0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
+    0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
+    0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
+    0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
+    0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
+    0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
+    0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
+    0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
+    0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
+    0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
+    0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
+    0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
+    0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
+    0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
+    0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
+    0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
+    0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
+    0x8def022dUL
+  },
+  {
+    0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
+    0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
+    0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
+    0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
+    0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
+    0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
+    0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
+    0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
+    0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
+    0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
+    0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
+    0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
+    0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
+    0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
+    0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
+    0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
+    0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
+    0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
+    0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
+    0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
+    0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
+    0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
+    0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
+    0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
+    0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
+    0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
+    0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
+    0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
+    0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
+    0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
+    0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
+    0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
+    0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
+    0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
+    0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
+    0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
+    0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
+    0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
+    0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
+    0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
+    0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
+    0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
+    0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
+    0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
+    0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
+    0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
+    0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
+    0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
+    0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
+    0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
+    0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
+    0x72fd2493UL
+  },
+  {
+    0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
+    0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
+    0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
+    0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
+    0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
+    0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
+    0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
+    0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
+    0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
+    0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
+    0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
+    0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
+    0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
+    0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
+    0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
+    0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
+    0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
+    0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
+    0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
+    0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
+    0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
+    0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
+    0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
+    0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
+    0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
+    0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
+    0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
+    0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
+    0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
+    0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
+    0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
+    0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
+    0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
+    0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
+    0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
+    0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
+    0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
+    0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
+    0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
+    0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
+    0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
+    0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
+    0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
+    0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
+    0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
+    0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
+    0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
+    0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
+    0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
+    0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
+    0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
+    0xed3498beUL
+  },
+  {
+    0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
+    0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
+    0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
+    0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
+    0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
+    0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
+    0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
+    0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
+    0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
+    0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
+    0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
+    0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
+    0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
+    0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
+    0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
+    0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
+    0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
+    0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
+    0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
+    0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
+    0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
+    0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
+    0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
+    0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
+    0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
+    0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
+    0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
+    0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
+    0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
+    0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
+    0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
+    0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
+    0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
+    0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
+    0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
+    0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
+    0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
+    0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
+    0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
+    0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
+    0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
+    0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
+    0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
+    0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
+    0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
+    0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
+    0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
+    0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
+    0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
+    0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
+    0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
+    0xf10605deUL
+#endif
+  }
+};
--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,11 +30,35 @@
 // definition. See stubRoutines.hpp for a description on how to
 // extend it.
 
-static bool    returns_to_call_stub(address return_pc)   { return return_pc == _call_stub_return_address; }
+static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; }
 
 enum platform_dependent_constants {
   code_size1 = 20000,          // simply increase if too small (assembler will crash if too small)
   code_size2 = 20000           // simply increase if too small (assembler will crash if too small)
 };
 
+// CRC32 Intrinsics.
+#define CRC32_COLUMN_SIZE 256
+#define CRC32_BYFOUR
+#ifdef  CRC32_BYFOUR
+  #define CRC32_TABLES 8
+#else
+  #define CRC32_TABLES 1
+#endif
+
+class ppc64 {
+ friend class StubGenerator;
+
+ private:
+
+  // CRC32 Intrinsics.
+  static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
+
+ public:
+
+  // CRC32 Intrinsics.
+  static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
+
+};
+
 #endif // CPU_PPC_VM_STUBROUTINES_PPC_64_HPP
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -58,7 +58,7 @@
 #define BLOCK_COMMENT(str) __ block_comment(str)
 #endif
 
-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+#define BIND(label)        __ bind(label); BLOCK_COMMENT(#label ":")
 
 //-----------------------------------------------------------------------------
 
@@ -725,7 +725,7 @@
     }
     generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
 
-    __ BIND(continue_after_compile);
+    BIND(continue_after_compile);
     // Reset the _do_not_unlock_if_synchronized flag.
     if (synchronized) {
       __ li(R0, 0);
@@ -785,7 +785,7 @@
   __ ld(signature_handler_fd, method_(signature_handler));
   __ twi_0(signature_handler_fd); // Order wrt. load of klass mirror and entry point (isync is below).
 
-  __ BIND(call_signature_handler);
+  BIND(call_signature_handler);
 
   // Before we call the signature handler we push a new frame to
   // protect the interpreter frame volatile registers when we return
@@ -855,7 +855,7 @@
     __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1);
     // R4_ARG2 = &state->_oop_temp;
     __ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp));
-    __ BIND(method_is_not_static);
+    BIND(method_is_not_static);
   }
 
   // At this point, arguments have been copied off the stack into
@@ -1068,14 +1068,14 @@
   // interpreter will do the correct thing. If it isn't interpreted
   // (call stub/compiled code) we will change our return and continue.
 
-  __ BIND(exception_return_sync_check);
+  BIND(exception_return_sync_check);
 
   if (synchronized) {
     // Don't check for exceptions since we're still in the i2n frame. Do that
     // manually afterwards.
     unlock_method(false);
   }
-  __ BIND(exception_return_sync_check_already_unlocked);
+  BIND(exception_return_sync_check_already_unlocked);
 
   const Register return_pc = R31;
 
@@ -1240,6 +1240,179 @@
   return entry;
 }
 
+// CRC32 Intrinsics.
+//
+// Contract on scratch and work registers.
+// =======================================
+//
+// On ppc, the register set {R2..R12} is available in the interpreter as scratch/work registers.
+// You should, however, keep in mind that {R3_ARG1..R10_ARG8} is the C-ABI argument register set.
+// You can't rely on these registers across calls.
+//
+// The generators for CRC32_update and for CRC32_updateBytes use the
+// scratch/work register set internally, passing the work registers
+// as arguments to the MacroAssembler emitters as required.
+//
+// R3_ARG1..R6_ARG4 are preset to hold the incoming java arguments.
+// Their contents is not constant but may change according to the requirements
+// of the emitted code.
+//
+// All other registers from the scratch/work register set are used "internally"
+// and contain garbage (i.e. unpredictable values) once blr() is reached.
+// Basically, only R3_RET contains a defined value which is the function result.
+//
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+  address start = __ pc();  // Remember stub start address (is rtn value).
+
+  if (UseCRC32Intrinsics) {
+    Label slow_path;
+
+    // Safepoint check
+    const Register sync_state = R11_scratch1;
+    int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
+    __ lwz(sync_state, sync_state_offs, sync_state);
+    __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
+    __ bne(CCR0, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we not even call stub code (we generate the code inline)
+    // and there is no safepoint on this path.
+
+    // Load java parameters.
+    // R15_esp is callers operand stack pointer, i.e. it points to the parameters.
+    const Register argP    = R15_esp;
+    const Register crc     = R3_ARG1;  // crc value
+    const Register data    = R4_ARG2;  // address of java byte value (kernel_crc32 needs address)
+    const Register dataLen = R5_ARG3;  // source data len (1 byte). Not used because calling the single-byte emitter.
+    const Register table   = R6_ARG4;  // address of crc32 table
+    const Register tmp     = dataLen;  // Reuse unused len register to show we don't actually need a separate tmp here.
+
+    BLOCK_COMMENT("CRC32_update {");
+
+    // Arguments are reversed on java expression stack
+#ifdef VM_LITTLE_ENDIAN
+    __ addi(data, argP, 0+1*wordSize); // (stack) address of byte value. Emitter expects address, not value.
+                                       // Being passed as an int, the single byte is at offset +0.
+#else
+    __ addi(data, argP, 3+1*wordSize); // (stack) address of byte value. Emitter expects address, not value.
+                                       // Being passed from java as an int, the single byte is at offset +3.
+#endif
+    __ lwz(crc,  2*wordSize, argP);    // Current crc state, zero extend to 64 bit to have a clean register.
+
+    StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
+    __ kernel_crc32_singleByte(crc, data, dataLen, table, tmp);
+
+    // Restore caller sp for c2i case and return.
+    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
+    __ blr();
+
+    // Generate a vanilla native entry as the slow path.
+    BLOCK_COMMENT("} CRC32_update");
+    BIND(slow_path);
+  }
+
+  (void) generate_native_entry(false);
+
+  return start;
+}
+
+// CRC32 Intrinsics.
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(     int crc, byte[] b,  int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  address start = __ pc();  // Remember stub start address (is rtn value).
+
+  if (UseCRC32Intrinsics) {
+    Label slow_path;
+
+    // Safepoint check
+    const Register sync_state = R11_scratch1;
+    int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
+    __ lwz(sync_state, sync_state_offs, sync_state);
+    __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
+    __ bne(CCR0, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we not even call stub code (we generate the code inline)
+    // and there is no safepoint on this path.
+
+    // Load parameters.
+    // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+    const Register argP    = R15_esp;
+    const Register crc     = R3_ARG1;  // crc value
+    const Register data    = R4_ARG2;  // address of java byte array
+    const Register dataLen = R5_ARG3;  // source data len
+    const Register table   = R6_ARG4;  // address of crc32 table
+
+    const Register t0      = R9;       // scratch registers for crc calculation
+    const Register t1      = R10;
+    const Register t2      = R11;
+    const Register t3      = R12;
+
+    const Register tc0     = R2;       // registers to hold pre-calculated column addresses
+    const Register tc1     = R7;
+    const Register tc2     = R8;
+    const Register tc3     = table;    // table address is reconstructed at the end of kernel_crc32_* emitters
+
+    const Register tmp     = t0;       // Only used very locally to calculate byte buffer address.
+
+    // Arguments are reversed on java expression stack.
+    // Calculate address of start element.
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { // Used for "updateByteBuffer direct".
+      BLOCK_COMMENT("CRC32_updateByteBuffer {");
+      // crc     @ (SP + 5W) (32bit)
+      // buf     @ (SP + 3W) (64bit ptr to long array)
+      // off     @ (SP + 2W) (32bit)
+      // dataLen @ (SP + 1W) (32bit)
+      // data = buf + off
+      __ ld(  data,    3*wordSize, argP);  // start of byte buffer
+      __ lwa( tmp,     2*wordSize, argP);  // byte buffer offset
+      __ lwa( dataLen, 1*wordSize, argP);  // #bytes to process
+      __ lwz( crc,     5*wordSize, argP);  // current crc state
+      __ add( data, data, tmp);            // Add byte buffer offset.
+    } else {                                                         // Used for "updateBytes update".
+      BLOCK_COMMENT("CRC32_updateBytes {");
+      // crc     @ (SP + 4W) (32bit)
+      // buf     @ (SP + 3W) (64bit ptr to byte array)
+      // off     @ (SP + 2W) (32bit)
+      // dataLen @ (SP + 1W) (32bit)
+      // data = buf + off + base_offset
+      __ ld(  data,    3*wordSize, argP);  // start of byte buffer
+      __ lwa( tmp,     2*wordSize, argP);  // byte buffer offset
+      __ lwa( dataLen, 1*wordSize, argP);  // #bytes to process
+      __ add( data, data, tmp);            // add byte buffer offset
+      __ lwz( crc,     4*wordSize, argP);  // current crc state
+      __ addi(data, data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+    }
+
+    StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
+
+    // Performance measurements show the 1word and 2word variants to be almost equivalent,
+    // with very light advantages for the 1word variant. We chose the 1word variant for
+    // code compactness.
+    __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3);
+
+    // Restore caller sp for c2i case and return.
+    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
+    __ blr();
+
+    // Generate a vanilla native entry as the slow path.
+    BLOCK_COMMENT("} CRC32_updateBytes(Buffer)");
+    BIND(slow_path);
+  }
+
+  (void) generate_native_entry(false);
+
+  return start;
+}
+
 // These should never be compiled since the interpreter will prefer
 // the compiled version to the intrinsic version.
 bool AbstractInterpreter::can_be_compiled(methodHandle m) {
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -159,10 +159,18 @@
 
   assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
 
-  if (UseCRC32Intrinsics) {
-    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
-      warning("CRC32 intrinsics  are not available on this CPU");
-    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+  // Implementation does not use any of the vector instructions
+  // available with Power8. Their exploitation is still pending.
+  if (!UseCRC32Intrinsics) {
+    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+      FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
+    }
+  }
+
+  if (UseCRC32CIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
+      warning("CRC32C intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
   }
 
   // The AES intrinsic stubs require AES instruction support.
@@ -192,12 +200,6 @@
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
 
-  if (UseCRC32CIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
-      warning("CRC32C intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
-  }
-
   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
     UseMultiplyToLenIntrinsic = true;
   }
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -30,6 +30,7 @@
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
+#include "oops/klass.inline.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/interfaceSupport.hpp"
--- a/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -26,6 +26,7 @@
 #include "asm/assembler.hpp"
 #include "code/relocInfo.hpp"
 #include "nativeInst_sparc.hpp"
+#include "oops/klass.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/safepoint.hpp"
 
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -31,6 +31,7 @@
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
+#include "oops/klass.inline.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/interfaceSupport.hpp"
--- a/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -26,6 +26,7 @@
 #include "asm/macroAssembler.hpp"
 #include "code/relocInfo.hpp"
 #include "nativeInst_x86.hpp"
+#include "oops/klass.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/safepoint.hpp"
 
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -285,7 +285,7 @@
     CPU_AVX512BW = (1 << 31)
   } cpuFeatureFlags;
 
-#define CPU_AVX512VL 0x100000000 // EVEX instructions with smaller vector length : enums are limited to 32bit
+#define CPU_AVX512VL UCONST64(0x100000000) // EVEX instructions with smaller vector length : enums are limited to 32bit
 
   enum {
     // AMD
--- a/hotspot/src/os/aix/vm/os_aix.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/os/aix/vm/os_aix.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -1655,13 +1655,6 @@
 }
 
 void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) {
-  // cpu
-  st->print("CPU:");
-  st->print("total %d", os::processor_count());
-  // It's not safe to query number of active processors after crash
-  // st->print("(active %d)", os::active_processor_count());
-  st->print(" %s", VM_Version::cpu_features());
-  st->cr();
 }
 
 void os::print_siginfo(outputStream* st, void* siginfo) {
@@ -3483,7 +3476,6 @@
   // For now UseLargePages is just ignored.
   FLAG_SET_ERGO(bool, UseLargePages, false);
   _page_sizes[0] = 0;
-  _large_page_size = -1;
 
   // debug trace
   trcVerbose("os::vm_page_size %s\n", describe_pagesize(os::vm_page_size()));
--- a/hotspot/src/share/vm/ci/ciField.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/ci/ciField.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -181,6 +181,17 @@
     return (holder()->is_subclass_of(callsite_klass) && (name() == ciSymbol::target_name()));
   }
 
+  bool is_autobox_cache() {
+    ciSymbol* klass_name = holder()->name();
+    return (name() == ciSymbol::cache_field_name() &&
+            holder()->uses_default_loader() &&
+            (klass_name == ciSymbol::java_lang_Character_CharacterCache() ||
+             klass_name == ciSymbol::java_lang_Byte_ByteCache() ||
+             klass_name == ciSymbol::java_lang_Short_ShortCache() ||
+             klass_name == ciSymbol::java_lang_Integer_IntegerCache() ||
+             klass_name == ciSymbol::java_lang_Long_LongCache()));
+  }
+
   // Debugging output
   void print();
   void print_name_on(outputStream* st);
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -2023,10 +2023,10 @@
   bool lvt_allocated = false;
   u2 max_lvt_cnt = INITIAL_MAX_LVT_NUMBER;
   u2 max_lvtt_cnt = INITIAL_MAX_LVT_NUMBER;
-  u2* localvariable_table_length;
-  u2** localvariable_table_start;
-  u2* localvariable_type_table_length;
-  u2** localvariable_type_table_start;
+  u2* localvariable_table_length = NULL;
+  u2** localvariable_table_start = NULL;
+  u2* localvariable_type_table_length = NULL;
+  u2** localvariable_type_table_start = NULL;
   int method_parameters_length = -1;
   u1* method_parameters_data = NULL;
   bool method_parameters_seen = false;
@@ -4171,10 +4171,13 @@
         }
       }
 
+#ifdef ASSERT
       if (CheckIntrinsics) {
         // Check for orphan methods in the current class. A method m
         // of a class C is orphan if an intrinsic is defined for method m,
         // but class C does not declare m.
+        // The check is potentially expensive, therefore it is available
+        // only in debug builds.
 
         for (int id = vmIntrinsics::FIRST_ID; id < (int)vmIntrinsics::ID_LIMIT; id++) {
           if (id == vmIntrinsics::_compiledLambdaForm) {
@@ -4210,8 +4213,10 @@
           }
         }
       }
+#endif // ASSERT
     }
 
+
     if (cached_class_file != NULL) {
       // JVMTI: we have an InstanceKlass now, tell it about the cached bytes
       this_klass->set_cached_class_file(cached_class_file);
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -658,7 +658,10 @@
 // annotation. If CheckIntrinsics is enabled, the VM performs the following
 // checks when a class C is loaded: (1) all intrinsics defined by the VM for
 // class C are present in the loaded class file and are marked;
-// (2) an intrinsic is defined by the VM for all marked methods of class C.
+// (2) an intrinsic is defined by the VM for all marked methods of class C;
+// (3) check for orphan methods in class C (i.e., methods for which the VM
+// declares an intrinsic but that are not declared for the loaded class C.
+// Check (3) is available only in debug builds.
 //
 // If a mismatch is detected for a method, the VM behaves differently depending
 // on the type of build. A fastdebug build exits and reports an error on a mismatch.
@@ -679,6 +682,10 @@
    do_name(     getClass_name,                                   "getClass")                                            \
   do_intrinsic(_clone,                    java_lang_Object,       clone_name, void_object_signature,             F_R)   \
    do_name(     clone_name,                                      "clone")                                               \
+  do_intrinsic(_notify,                   java_lang_Object,       notify_name, void_method_signature,            F_R)   \
+   do_name(     notify_name,                                     "notify")                                              \
+  do_intrinsic(_notifyAll,                java_lang_Object,       notifyAll_name, void_method_signature,         F_R)   \
+   do_name(     notifyAll_name,                                  "notifyAll")                                           \
                                                                                                                         \
   /* Math & StrictMath intrinsics are defined in terms of just a few signatures: */                                     \
   do_class(java_lang_Math,                "java/lang/Math")                                                             \
--- a/hotspot/src/share/vm/gc/g1/g1Allocator.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1Allocator.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -44,52 +44,52 @@
   size_t _summary_bytes_used;
 
 public:
-   G1Allocator(G1CollectedHeap* heap) :
-     _g1h(heap), _summary_bytes_used(0) { }
+  G1Allocator(G1CollectedHeap* heap) :
+    _g1h(heap), _summary_bytes_used(0) { }
 
-   static G1Allocator* create_allocator(G1CollectedHeap* g1h);
+  static G1Allocator* create_allocator(G1CollectedHeap* g1h);
 
-   virtual void init_mutator_alloc_region() = 0;
-   virtual void release_mutator_alloc_region() = 0;
+  virtual void init_mutator_alloc_region() = 0;
+  virtual void release_mutator_alloc_region() = 0;
 
-   virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0;
-   virtual void release_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0;
-   virtual void abandon_gc_alloc_regions() = 0;
+  virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0;
+  virtual void release_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0;
+  virtual void abandon_gc_alloc_regions() = 0;
 
-   virtual MutatorAllocRegion*    mutator_alloc_region(AllocationContext_t context) = 0;
-   virtual SurvivorGCAllocRegion* survivor_gc_alloc_region(AllocationContext_t context) = 0;
-   virtual OldGCAllocRegion*      old_gc_alloc_region(AllocationContext_t context) = 0;
-   virtual size_t                 used() = 0;
-   virtual bool                   is_retained_old_region(HeapRegion* hr) = 0;
+  virtual MutatorAllocRegion*    mutator_alloc_region(AllocationContext_t context) = 0;
+  virtual SurvivorGCAllocRegion* survivor_gc_alloc_region(AllocationContext_t context) = 0;
+  virtual OldGCAllocRegion*      old_gc_alloc_region(AllocationContext_t context) = 0;
+  virtual size_t                 used() = 0;
+  virtual bool                   is_retained_old_region(HeapRegion* hr) = 0;
 
-   void                           reuse_retained_old_region(EvacuationInfo& evacuation_info,
-                                                            OldGCAllocRegion* old,
-                                                            HeapRegion** retained);
+  void                           reuse_retained_old_region(EvacuationInfo& evacuation_info,
+                                                           OldGCAllocRegion* old,
+                                                           HeapRegion** retained);
 
-   size_t used_unlocked() const {
-     return _summary_bytes_used;
-   }
+  size_t used_unlocked() const {
+    return _summary_bytes_used;
+  }
 
-   void increase_used(size_t bytes) {
-     _summary_bytes_used += bytes;
-   }
+  void increase_used(size_t bytes) {
+    _summary_bytes_used += bytes;
+  }
 
-   void decrease_used(size_t bytes) {
-     assert(_summary_bytes_used >= bytes,
-            err_msg("invariant: _summary_bytes_used: " SIZE_FORMAT " should be >= bytes: " SIZE_FORMAT,
-                _summary_bytes_used, bytes));
-     _summary_bytes_used -= bytes;
-   }
+  void decrease_used(size_t bytes) {
+    assert(_summary_bytes_used >= bytes,
+           err_msg("invariant: _summary_bytes_used: " SIZE_FORMAT " should be >= bytes: " SIZE_FORMAT,
+               _summary_bytes_used, bytes));
+    _summary_bytes_used -= bytes;
+  }
 
-   void set_used(size_t bytes) {
-     _summary_bytes_used = bytes;
-   }
+  void set_used(size_t bytes) {
+    _summary_bytes_used = bytes;
+  }
 
-   virtual HeapRegion* new_heap_region(uint hrs_index,
-                                       G1BlockOffsetSharedArray* sharedOffsetArray,
-                                       MemRegion mr) {
-     return new HeapRegion(hrs_index, sharedOffsetArray, mr);
-   }
+  virtual HeapRegion* new_heap_region(uint hrs_index,
+                                      G1BlockOffsetSharedArray* sharedOffsetArray,
+                                      MemRegion mr) {
+    return new HeapRegion(hrs_index, sharedOffsetArray, mr);
+  }
 };
 
 // The default allocator for G1.
--- a/hotspot/src/share/vm/opto/compile.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/compile.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -140,6 +140,9 @@
   bool is_debug()                 const          { return _debug; }
   void set_debug(bool debug)                     { _debug = debug; }
   static const char* debug_option_name;
+
+  bool same_idx(node_idx_t k1, node_idx_t k2)  const { return idx(k1) == idx(k2); }
+  bool same_gen(node_idx_t k1, node_idx_t k2)  const { return gen(k1) == gen(k2); }
 };
 
 //------------------------------Compile----------------------------------------
--- a/hotspot/src/share/vm/opto/escape.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/escape.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -526,7 +526,7 @@
         if (adr->is_BoxLock())
           break;
         // Stored value escapes in unsafe access.
-        if ((opcode == Op_StoreP) && (adr_type == TypeRawPtr::BOTTOM)) {
+        if ((opcode == Op_StoreP) && adr_type->isa_rawptr()) {
           // Pointer stores in G1 barriers looks like unsafe access.
           // Ignore such stores to be able scalar replace non-escaping
           // allocations.
@@ -540,11 +540,11 @@
                 int offs = (int)igvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
                 if (offs == in_bytes(JavaThread::satb_mark_queue_offset() +
                                      PtrQueue::byte_offset_of_buf())) {
-                  break; // G1 pre barier previous oop value store.
+                  break; // G1 pre barrier previous oop value store.
                 }
                 if (offs == in_bytes(JavaThread::dirty_card_queue_offset() +
                                      PtrQueue::byte_offset_of_buf())) {
-                  break; // G1 post barier card address store.
+                  break; // G1 post barrier card address store.
                 }
               }
             }
@@ -725,7 +725,7 @@
         assert(ptn != NULL, "node should be registered");
         add_edge(adr_ptn, ptn);
         break;
-      } else if ((opcode == Op_StoreP) && (adr_type == TypeRawPtr::BOTTOM)) {
+      } else if ((opcode == Op_StoreP) && adr_type->isa_rawptr()) {
         // Stored value escapes in unsafe access.
         Node *val = n->in(MemNode::ValueIn);
         PointsToNode* ptn = ptnode_adr(val->_idx);
--- a/hotspot/src/share/vm/opto/library_call.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -225,6 +225,7 @@
   bool inline_pow();
   Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
   bool inline_min_max(vmIntrinsics::ID id);
+  bool inline_notify(vmIntrinsics::ID id);
   Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
   // This returns Type::AnyPtr, RawPtr, or OopPtr.
   int classify_unsafe_addr(Node* &base, Node* &offset);
@@ -776,6 +777,13 @@
   case vmIntrinsics::_min:
   case vmIntrinsics::_max:                      return inline_min_max(intrinsic_id());
 
+  case vmIntrinsics::_notify:
+  case vmIntrinsics::_notifyAll:
+    if (InlineNotify) {
+      return inline_notify(intrinsic_id());
+    }
+    return false;
+
   case vmIntrinsics::_addExactI:                return inline_math_addExactI(false /* add */);
   case vmIntrinsics::_addExactL:                return inline_math_addExactL(false /* add */);
   case vmIntrinsics::_decrementExactI:          return inline_math_subtractExactI(true /* decrement */);
@@ -2075,6 +2083,21 @@
           );
 }
 
+//----------------------------inline_notify-----------------------------------*
+bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) {
+  const TypeFunc* ftype = OptoRuntime::monitor_notify_Type();
+  address func;
+  if (id == vmIntrinsics::_notify) {
+    func = OptoRuntime::monitor_notify_Java();
+  } else {
+    func = OptoRuntime::monitor_notifyAll_Java();
+  }
+  Node* call = make_runtime_call(RC_NO_LEAF, ftype, func, NULL, TypeRawPtr::BOTTOM, argument(0));
+  make_slow_call_ex(call, env()->Throwable_klass(), false);
+  return true;
+}
+
+
 //----------------------------inline_min_max-----------------------------------
 bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
   set_result(generate_min_max(id, argument(0), argument(1)));
@@ -2687,35 +2710,48 @@
   // of safe & unsafe memory.
   if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
 
-  if (!is_store) {
-    MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered;
-    // To be valid, unsafe loads may depend on other conditions than
-    // the one that guards them: pin the Load node
-    Node* p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile);
-    // load value
-    switch (type) {
-    case T_BOOLEAN:
-    case T_CHAR:
-    case T_BYTE:
-    case T_SHORT:
-    case T_INT:
-    case T_LONG:
-    case T_FLOAT:
-    case T_DOUBLE:
-      break;
-    case T_OBJECT:
-      if (need_read_barrier) {
-        insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar));
+   if (!is_store) {
+    Node* p = NULL;
+    // Try to constant fold a load from a constant field
+    ciField* field = alias_type->field();
+    if (heap_base_oop != top() &&
+        field != NULL && field->is_constant() && field->layout_type() == type) {
+      // final or stable field
+      const Type* con_type = Type::make_constant(alias_type->field(), heap_base_oop);
+      if (con_type != NULL) {
+        p = makecon(con_type);
       }
-      break;
-    case T_ADDRESS:
-      // Cast to an int type.
-      p = _gvn.transform(new CastP2XNode(NULL, p));
-      p = ConvX2UL(p);
-      break;
-    default:
-      fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
-      break;
+    }
+    if (p == NULL) {
+      MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered;
+      // To be valid, unsafe loads may depend on other conditions than
+      // the one that guards them: pin the Load node
+      p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile);
+      // load value
+      switch (type) {
+      case T_BOOLEAN:
+      case T_CHAR:
+      case T_BYTE:
+      case T_SHORT:
+      case T_INT:
+      case T_LONG:
+      case T_FLOAT:
+      case T_DOUBLE:
+        break;
+      case T_OBJECT:
+        if (need_read_barrier) {
+          insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar));
+        }
+        break;
+      case T_ADDRESS:
+        // Cast to an int type.
+        p = _gvn.transform(new CastP2XNode(NULL, p));
+        p = ConvX2UL(p);
+        break;
+      default:
+        fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
+        break;
+      }
     }
     // The load node has the control of the preceding MemBarCPUOrder.  All
     // following nodes will have the control of the MemBarCPUOrder inserted at
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -3682,7 +3682,6 @@
 }
 
 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
-  CloneMap& cm = C->clone_map();
   loop->dump_head();
 
   // Now scan for CFG nodes in the same loop
@@ -3714,7 +3713,6 @@
         cached_idom = find_non_split_ctrl(cached_idom);
       }
       tty->print(" ID:%d",computed_idom->_idx);
-      cm.dump(n->_idx);
       n->dump();
       if( cached_idom != computed_idom ) {
         tty->print_cr("*** BROKEN IDOM!  Computed as: %d, cached as: %d",
@@ -3734,7 +3732,6 @@
           for( uint j = 0; j < loop->_nest; j++ )
             tty->print("  ");
           tty->print(" ");
-          cm.dump(m->_idx);
           m->dump();
         }
       }
--- a/hotspot/src/share/vm/opto/parse.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/parse.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -539,10 +539,6 @@
   void do_get_xxx(Node* obj, ciField* field, bool is_field);
   void do_put_xxx(Node* obj, ciField* field, bool is_field);
 
-  // loading from a constant field or the constant pool
-  // returns false if push failed (non-perm field constants only, not ldcs)
-  bool push_constant(ciConstant con, bool require_constant = false, bool is_autobox_cache = false, const Type* basic_type = NULL);
-
   // implementation of object creation bytecodes
   void emit_guard_for_new(ciInstanceKlass* klass);
   void do_new();
--- a/hotspot/src/share/vm/opto/parse2.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/parse2.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -1478,8 +1478,10 @@
       }
       assert(constant.basic_type() != T_OBJECT || constant.as_object()->is_instance(),
              "must be java_mirror of klass");
-      bool pushed = push_constant(constant, true);
-      guarantee(pushed, "must be possible to push this constant");
+      const Type* con_type = Type::make_from_constant(constant);
+      if (con_type != NULL) {
+        push_node(con_type->basic_type(), makecon(con_type));
+      }
     }
 
     break;
--- a/hotspot/src/share/vm/opto/parse3.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/parse3.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -149,51 +149,10 @@
   // Does this field have a constant value?  If so, just push the value.
   if (field->is_constant()) {
     // final or stable field
-    const Type* stable_type = NULL;
-    if (FoldStableValues && field->is_stable()) {
-      stable_type = Type::get_const_type(field->type());
-      if (field->type()->is_array_klass()) {
-        int stable_dimension = field->type()->as_array_klass()->dimension();
-        stable_type = stable_type->is_aryptr()->cast_to_stable(true, stable_dimension);
-      }
-    }
-    if (field->is_static()) {
-      // final static field
-      if (C->eliminate_boxing()) {
-        // The pointers in the autobox arrays are always non-null.
-        ciSymbol* klass_name = field->holder()->name();
-        if (field->name() == ciSymbol::cache_field_name() &&
-            field->holder()->uses_default_loader() &&
-            (klass_name == ciSymbol::java_lang_Character_CharacterCache() ||
-             klass_name == ciSymbol::java_lang_Byte_ByteCache() ||
-             klass_name == ciSymbol::java_lang_Short_ShortCache() ||
-             klass_name == ciSymbol::java_lang_Integer_IntegerCache() ||
-             klass_name == ciSymbol::java_lang_Long_LongCache())) {
-          bool require_const = true;
-          bool autobox_cache = true;
-          if (push_constant(field->constant_value(), require_const, autobox_cache)) {
-            return;
-          }
-        }
-      }
-      if (push_constant(field->constant_value(), false, false, stable_type))
-        return;
-    } else {
-      // final or stable non-static field
-      // Treat final non-static fields of trusted classes (classes in
-      // java.lang.invoke and sun.invoke packages and subpackages) as
-      // compile time constants.
-      if (obj->is_Con()) {
-        const TypeOopPtr* oop_ptr = obj->bottom_type()->isa_oopptr();
-        ciObject* constant_oop = oop_ptr->const_oop();
-        ciConstant constant = field->constant_value_of(constant_oop);
-        if (FoldStableValues && field->is_stable() && constant.is_null_or_zero()) {
-          // fall through to field load; the field is not yet initialized
-        } else {
-          if (push_constant(constant, true, false, stable_type))
-            return;
-        }
-      }
+    const Type* con_type = Type::make_constant(field, obj);
+    if (con_type != NULL) {
+      push_node(con_type->basic_type(), makecon(con_type));
+      return;
     }
   }
 
@@ -362,39 +321,6 @@
   }
 }
 
-
-
-bool Parse::push_constant(ciConstant constant, bool require_constant, bool is_autobox_cache, const Type* stable_type) {
-  const Type* con_type = Type::make_from_constant(constant, require_constant, is_autobox_cache);
-  switch (constant.basic_type()) {
-  case T_ARRAY:
-  case T_OBJECT:
-    // cases:
-    //   can_be_constant    = (oop not scavengable || ScavengeRootsInCode != 0)
-    //   should_be_constant = (oop not scavengable || ScavengeRootsInCode >= 2)
-    // An oop is not scavengable if it is in the perm gen.
-    if (stable_type != NULL && con_type != NULL && con_type->isa_oopptr())
-      con_type = con_type->join_speculative(stable_type);
-    break;
-
-  case T_ILLEGAL:
-    // Invalid ciConstant returned due to OutOfMemoryError in the CI
-    assert(C->env()->failing(), "otherwise should not see this");
-    // These always occur because of object types; we are going to
-    // bail out anyway, so make the stack depths match up
-    push( zerocon(T_OBJECT) );
-    return false;
-  }
-
-  if (con_type == NULL)
-    // we cannot inline the oop, but we can use it later to narrow a type
-    return false;
-
-  push_node(constant.basic_type(), makecon(con_type));
-  return true;
-}
-
-
 //=============================================================================
 void Parse::do_anewarray() {
   bool will_link;
--- a/hotspot/src/share/vm/opto/runtime.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -96,6 +96,8 @@
 address OptoRuntime::_g1_wb_post_Java                             = NULL;
 address OptoRuntime::_vtable_must_compile_Java                    = NULL;
 address OptoRuntime::_complete_monitor_locking_Java               = NULL;
+address OptoRuntime::_monitor_notify_Java                         = NULL;
+address OptoRuntime::_monitor_notifyAll_Java                      = NULL;
 address OptoRuntime::_rethrow_Java                                = NULL;
 
 address OptoRuntime::_slow_arraycopy_Java                         = NULL;
@@ -144,6 +146,8 @@
   gen(env, _g1_wb_pre_Java                 , g1_wb_pre_Type               , SharedRuntime::g1_wb_pre        ,    0 , false, false, false);
   gen(env, _g1_wb_post_Java                , g1_wb_post_Type              , SharedRuntime::g1_wb_post       ,    0 , false, false, false);
   gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C, 0, false, false, false);
+  gen(env, _monitor_notify_Java            , monitor_notify_Type          , monitor_notify_C                ,    0 , false, false, false);
+  gen(env, _monitor_notifyAll_Java         , monitor_notify_Type          , monitor_notifyAll_C             ,    0 , false, false, false);
   gen(env, _rethrow_Java                   , rethrow_Type                 , rethrow_C                       ,    2 , true , false, true );
 
   gen(env, _slow_arraycopy_Java            , slow_arraycopy_Type          , SharedRuntime::slow_arraycopy_C ,    0 , false, false, false);
@@ -426,6 +430,45 @@
   thread->set_vm_result(obj);
 JRT_END
 
+JRT_BLOCK_ENTRY(void, OptoRuntime::monitor_notify_C(oopDesc* obj, JavaThread *thread))
+
+  // Very few notify/notifyAll operations find any threads on the waitset, so
+  // the dominant fast-path is to simply return.
+  // Relatedly, it's critical that notify/notifyAll be fast in order to
+  // reduce lock hold times.
+  if (!SafepointSynchronize::is_synchronizing()) {
+    if (ObjectSynchronizer::quick_notify(obj, thread, false)) {
+      return;
+    }
+  }
+
+  // This is the case the fast-path above isn't provisioned to handle.
+  // The fast-path is designed to handle frequently arising cases in an efficient manner.
+  // (The fast-path is just a degenerate variant of the slow-path).
+  // Perform the dreaded state transition and pass control into the slow-path.
+  JRT_BLOCK;
+  Handle h_obj(THREAD, obj);
+  ObjectSynchronizer::notify(h_obj, CHECK);
+  JRT_BLOCK_END;
+JRT_END
+
+JRT_BLOCK_ENTRY(void, OptoRuntime::monitor_notifyAll_C(oopDesc* obj, JavaThread *thread))
+
+  if (!SafepointSynchronize::is_synchronizing() ) {
+    if (ObjectSynchronizer::quick_notify(obj, thread, true)) {
+      return;
+    }
+  }
+
+  // This is the case the fast-path above isn't provisioned to handle.
+  // The fast-path is designed to handle frequently arising cases in an efficient manner.
+  // (The fast-path is just a degenerate variant of the slow-path).
+  // Perform the dreaded state transition and pass control into the slow-path.
+  JRT_BLOCK;
+  Handle h_obj(THREAD, obj);
+  ObjectSynchronizer::notifyall(h_obj, CHECK);
+  JRT_BLOCK_END;
+JRT_END
 
 const TypeFunc *OptoRuntime::new_instance_Type() {
   // create input type (domain)
@@ -604,14 +647,26 @@
   fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
   fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM;    // Address of stack location for lock - BasicLock
   fields[TypeFunc::Parms+2] = TypeRawPtr::BOTTOM;    // Thread pointer (Self)
-  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3,fields);
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3, fields);
 
   // create result type (range)
   fields = TypeTuple::fields(0);
 
-  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  return TypeFunc::make(domain, range);
+}
 
-  return TypeFunc::make(domain,range);
+const TypeFunc *OptoRuntime::monitor_notify_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+  return TypeFunc::make(domain, range);
 }
 
 const TypeFunc* OptoRuntime::flush_windows_Type() {
--- a/hotspot/src/share/vm/opto/runtime.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/runtime.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -146,6 +146,8 @@
   static address _vtable_must_compile_Java;
   static address _complete_monitor_locking_Java;
   static address _rethrow_Java;
+  static address _monitor_notify_Java;
+  static address _monitor_notifyAll_Java;
 
   static address _slow_arraycopy_Java;
   static address _register_finalizer_Java;
@@ -186,6 +188,9 @@
   static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread);
   static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread);
 
+  static void monitor_notify_C(oopDesc* obj, JavaThread* thread);
+  static void monitor_notifyAll_C(oopDesc* obj, JavaThread* thread);
+
 private:
 
   // Implicit exception support
@@ -244,7 +249,9 @@
   static address g1_wb_pre_Java()                        { return _g1_wb_pre_Java; }
   static address g1_wb_post_Java()                       { return _g1_wb_post_Java; }
   static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
-  static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java;   }
+  static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java; }
+  static address monitor_notify_Java()                   { return _monitor_notify_Java; }
+  static address monitor_notifyAll_Java()                { return _monitor_notifyAll_Java; }
 
   static address slow_arraycopy_Java()                   { return _slow_arraycopy_Java; }
   static address register_finalizer_Java()               { return _register_finalizer_Java; }
@@ -285,6 +292,7 @@
   static const TypeFunc* g1_wb_post_Type();
   static const TypeFunc* complete_monitor_enter_Type();
   static const TypeFunc* complete_monitor_exit_Type();
+  static const TypeFunc* monitor_notify_Type();
   static const TypeFunc* uncommon_trap_Type();
   static const TypeFunc* athrow_Type();
   static const TypeFunc* rethrow_Type();
--- a/hotspot/src/share/vm/opto/superword.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/superword.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -74,9 +74,15 @@
   _do_vector_loop(phase->C->do_vector_loop()),  // whether to do vectorization/simd style
   _ii_first(-1),                          // first loop generation index - only if do_vector_loop()
   _ii_last(-1),                           // last loop generation index - only if do_vector_loop()
-  _ii_order(arena(), 8, 0, 0),
-  _vector_loop_debug(phase->C->has_method() && phase->C->method_has_option("VectorizeDebug"))
-{}
+  _ii_order(arena(), 8, 0, 0)
+{
+#ifndef PRODUCT
+  _vector_loop_debug = 0;
+  if (_phase->C->method() != NULL) {
+    _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug);
+  }
+#endif
+}
 
 //------------------------------transform_loop---------------------------
 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
@@ -90,7 +96,6 @@
   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
 
   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
-
   // Check for no control flow in body (other than exit)
   Node *cl_exit = cl->loopexit();
   if (cl_exit->in(0) != lpt->_head) return;
@@ -425,13 +430,15 @@
       // this reference to a vector-aligned address.
       best_align_to_mem_ref = mem_ref;
       best_iv_adjustment = iv_adjustment;
+      NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)
     }
 
     SWPointer align_to_ref_p(mem_ref, this, NULL, false);
     // Set alignment relative to "align_to_ref" for all related memory operations.
     for (int i = memops.size() - 1; i >= 0; i--) {
       MemNode* s = memops.at(i)->as_Mem();
-      if (isomorphic(s, mem_ref)) {
+      if (isomorphic(s, mem_ref) &&
+           (!_do_vector_loop || same_origin_idx(s, mem_ref))) {
         SWPointer p2(s, this, NULL, false);
         if (p2.comparable(align_to_ref_p)) {
           int align = memory_alignment(s, iv_adjustment);
@@ -496,7 +503,7 @@
               Node_List* pair = new Node_List();
               pair->push(s1);
               pair->push(s2);
-              if (!_do_vector_loop || _clone_map.idx(s1->_idx) == _clone_map.idx(s2->_idx)) {
+              if (!_do_vector_loop || same_origin_idx(s1, s2)) {
                 _packset.append(pair);
               }
             }
@@ -533,8 +540,12 @@
           memops.push(s);
         }
         MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
-        if (best_align_to_mem_ref == NULL) break;
+        if (best_align_to_mem_ref == NULL) {
+          NOT_PRODUCT(if (TraceSuperWord) tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");)
+          break;
+        }
         best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
+        NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)
         // Restore list.
         while (memops.size() > orig_msize)
           (void)memops.pop();
@@ -560,6 +571,16 @@
 #endif
 }
 
+#ifndef PRODUCT
+void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) {
+  if (is_trace_adjacent()) {
+    tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d",
+       best_align_to_mem_ref->_idx, best_iv_adjustment);
+       best_align_to_mem_ref->dump();
+  }
+}
+#endif
+
 //------------------------------find_align_to_ref---------------------------
 // Find a memory reference to align the loop induction variable to.
 // Looks first at stores then at loads, looking for a memory reference
@@ -756,9 +777,11 @@
   }
 
 #ifndef PRODUCT
-  if (TraceSuperWord)
-    tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
-                  offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
+  if (TraceSuperWord) {
+    tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
+      mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
+    mem_ref->dump();
+  }
 #endif
   return iv_adjustment;
 }
@@ -863,12 +886,14 @@
   Node* n = start;
   Node* prev = NULL;
   while (true) {
+    NOT_PRODUCT( if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);)
     assert(in_bb(n), "must be in block");
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node* out = n->fast_out(i);
       if (out->is_Load()) {
         if (in_bb(out)) {
           preds.push(out);
+          NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);)
         }
       } else {
         // FIXME
@@ -883,10 +908,11 @@
         } else {
           assert(out == prev || prev == NULL, "no branches off of store slice");
         }
-      }
-    }
+      }//else
+    }//for
     if (n == stop) break;
     preds.push(n);
+    NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);)
     prev = n;
     assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
     n = n->in(MemNode::Memory);
@@ -2140,18 +2166,38 @@
     } else {
       _stk.pop(); // Remove post-visited node from stack
     }
-  }
-
+  }//while
+
+  int ii_current = -1;
+  unsigned int load_idx = (unsigned int)-1;
+  _ii_order.clear();
   // Create real map of block indices for nodes
   for (int j = 0; j < _block.length(); j++) {
     Node* n = _block.at(j);
     set_bb_idx(n, j);
-  }
+    if (_do_vector_loop && n->is_Load()) {
+      if (ii_current == -1) {
+        ii_current = _clone_map.gen(n->_idx);
+        _ii_order.push(ii_current);
+        load_idx = _clone_map.idx(n->_idx);
+      } else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) {
+        ii_current = _clone_map.gen(n->_idx);
+        _ii_order.push(ii_current);
+      }
+    }
+  }//for
 
   // Ensure extra info is allocated.
   initialize_bb();
 
 #ifndef PRODUCT
+  if (_vector_loop_debug && _ii_order.length() > 0) {
+    tty->print("SuperWord::construct_bb: List of generations: ");
+    for (int jj = 0; jj < _ii_order.length(); ++jj) {
+      tty->print("  %d:%d", jj, _ii_order.at(jj));
+    }
+    tty->print_cr(" ");
+  }
   if (TraceSuperWord) {
     print_bb();
     tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
@@ -2312,18 +2358,27 @@
 //------------------------------memory_alignment---------------------------
 // Alignment within a vector memory reference
 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
+  #ifndef PRODUCT
+    if(TraceSuperWord && Verbose) {
+      tty->print("SuperWord::memory_alignment within a vector memory reference for %d:  ", s->_idx); s->dump();
+    }
+  #endif
+  NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);)
   SWPointer p(s, this, NULL, false);
   if (!p.valid()) {
+    NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)
     return bottom_align;
   }
   int vw = vector_width_in_bytes(s);
   if (vw < 2) {
+    NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
     return bottom_align; // No vectors for this type
   }
   int offset  = p.offset_in_bytes();
   offset     += iv_adjust*p.memory_size();
   int off_rem = offset % vw;
   int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
+  NOT_PRODUCT(if(TraceSuperWord && Verbose) tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);)
   return off_mod;
 }
 
@@ -2732,13 +2787,20 @@
 
 
 //==============================SWPointer===========================
-
+#ifndef PRODUCT
+int SWPointer::Tracer::_depth = 0;
+#endif
 //----------------------------SWPointer------------------------
 SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) :
   _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),
   _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
   _nstack(nstack), _analyze_only(analyze_only),
-  _stack_idx(0) {
+  _stack_idx(0)
+#ifndef PRODUCT
+  , _tracer(slp)
+#endif
+{
+  NOT_PRODUCT(_tracer.ctor_1(mem);)
 
   Node* adr = mem->in(MemNode::Address);
   if (!adr->is_AddP()) {
@@ -2757,16 +2819,29 @@
     assert(!valid(), "unsafe access");
     return;
   }
-  for (int i = 0; i < 3; i++) {
+
+  NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();)
+  NOT_PRODUCT(_tracer.ctor_2(adr);)
+
+  int i;
+  for (i = 0; i < 3; i++) {
+    NOT_PRODUCT(_tracer.ctor_3(adr, i);)
+
     if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
       assert(!valid(), "too complex");
       return;
     }
     adr = adr->in(AddPNode::Address);
+    NOT_PRODUCT(_tracer.ctor_4(adr, i);)
+
     if (base == adr || !adr->is_AddP()) {
+      NOT_PRODUCT(_tracer.ctor_5(adr, base, i);)
       break; // stop looking at addp's
     }
   }
+  NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();)
+  NOT_PRODUCT(_tracer.ctor_6(mem);)
+
   _base = base;
   _adr  = adr;
   assert(valid(), "Usable");
@@ -2778,68 +2853,103 @@
   _mem(p->_mem), _slp(p->_slp),  _base(NULL),  _adr(NULL),
   _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
   _nstack(p->_nstack), _analyze_only(p->_analyze_only),
-  _stack_idx(p->_stack_idx) {}
-
+  _stack_idx(p->_stack_idx)
+  #ifndef PRODUCT
+  , _tracer(p->_slp)
+  #endif
+{}
+
+
+bool SWPointer::invariant(Node* n) {
+  NOT_PRODUCT(Tracer::Depth dd;)
+  Node *n_c = phase()->get_ctrl(n);
+  NOT_PRODUCT(_tracer.invariant_1(n, n_c);)
+  return !lpt()->is_member(phase()->get_loop(n_c));
+}
 //------------------------scaled_iv_plus_offset--------------------
 // Match: k*iv + offset
 // where: k is a constant that maybe zero, and
 //        offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
 bool SWPointer::scaled_iv_plus_offset(Node* n) {
+  NOT_PRODUCT(Tracer::Depth ddd;)
+  NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);)
+
   if (scaled_iv(n)) {
+    NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);)
     return true;
   }
+
   if (offset_plus_k(n)) {
+    NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);)
     return true;
   }
+
   int opc = n->Opcode();
   if (opc == Op_AddI) {
     if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
+      NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);)
       return true;
     }
     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
+      NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);)
       return true;
     }
   } else if (opc == Op_SubI) {
     if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) {
+      NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);)
       return true;
     }
     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
       _scale *= -1;
+      NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);)
       return true;
     }
   }
+
+  NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);)
   return false;
 }
 
 //----------------------------scaled_iv------------------------
 // Match: k*iv where k is a constant that's not zero
 bool SWPointer::scaled_iv(Node* n) {
-  if (_scale != 0) {
-    return false;  // already found a scale
+  NOT_PRODUCT(Tracer::Depth ddd;)
+  NOT_PRODUCT(_tracer.scaled_iv_1(n);)
+
+  if (_scale != 0) { // already found a scale
+    NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);)
+    return false;
   }
+
   if (n == iv()) {
     _scale = 1;
+    NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);)
     return true;
   }
   if (_analyze_only && (invariant(n) == false)) {
     _nstack->push(n, _stack_idx++);
   }
+
   int opc = n->Opcode();
   if (opc == Op_MulI) {
     if (n->in(1) == iv() && n->in(2)->is_Con()) {
       _scale = n->in(2)->get_int();
+      NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);)
       return true;
     } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
       _scale = n->in(1)->get_int();
+      NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);)
       return true;
     }
   } else if (opc == Op_LShiftI) {
     if (n->in(1) == iv() && n->in(2)->is_Con()) {
       _scale = 1 << n->in(2)->get_int();
+      NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)
       return true;
     }
   } else if (opc == Op_ConvI2L) {
     if (scaled_iv_plus_offset(n->in(1))) {
+      NOT_PRODUCT(_tracer.scaled_iv_7(n);)
       return true;
     }
   } else if (opc == Op_LShiftL) {
@@ -2847,17 +2957,22 @@
       // Need to preserve the current _offset value, so
       // create a temporary object for this expression subtree.
       // Hacky, so should re-engineer the address pattern match.
+      NOT_PRODUCT(Tracer::Depth dddd;)
       SWPointer tmp(this);
+      NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)
+
       if (tmp.scaled_iv_plus_offset(n->in(1))) {
-        if (tmp._invar == NULL) {
+        if (tmp._invar == NULL || _slp->do_vector_loop()) {
           int mult = 1 << n->in(2)->get_int();
           _scale   = tmp._scale  * mult;
           _offset += tmp._offset * mult;
+          NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, mult);)
           return true;
         }
       }
     }
   }
+  NOT_PRODUCT(_tracer.scaled_iv_10(n);)
   return false;
 }
 
@@ -2865,9 +2980,13 @@
 // Match: offset is (k [+/- invariant])
 // where k maybe zero and invariant is optional, but not both.
 bool SWPointer::offset_plus_k(Node* n, bool negate) {
+  NOT_PRODUCT(Tracer::Depth ddd;)
+  NOT_PRODUCT(_tracer.offset_plus_k_1(n);)
+
   int opc = n->Opcode();
   if (opc == Op_ConI) {
     _offset += negate ? -(n->get_int()) : n->get_int();
+    NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)
     return true;
   } else if (opc == Op_ConL) {
     // Okay if value fits into an int
@@ -2876,11 +2995,17 @@
       jlong loff = n->get_long();
       jint  off  = (jint)loff;
       _offset += negate ? -off : loff;
+      NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)
       return true;
     }
+    NOT_PRODUCT(_tracer.offset_plus_k_4(n);)
     return false;
   }
-  if (_invar != NULL) return false; // already have an invariant
+  if (_invar != NULL) { // already has an invariant
+    NOT_PRODUCT(_tracer.offset_plus_k_5(n, _invar);)
+    return false;
+  }
+
   if (_analyze_only && (invariant(n) == false)) {
     _nstack->push(n, _stack_idx++);
   }
@@ -2889,11 +3014,13 @@
       _negate_invar = negate;
       _invar = n->in(1);
       _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+      NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);)
       return true;
     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
       _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
       _negate_invar = negate;
       _invar = n->in(2);
+      NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);)
       return true;
     }
   }
@@ -2902,19 +3029,24 @@
       _negate_invar = negate;
       _invar = n->in(1);
       _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+      NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);)
       return true;
     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
       _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
       _negate_invar = !negate;
       _invar = n->in(2);
+      NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);)
       return true;
     }
   }
   if (invariant(n)) {
     _negate_invar = negate;
     _invar = n;
+    NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);)
     return true;
   }
+
+  NOT_PRODUCT(_tracer.offset_plus_k_11(n);)
   return false;
 }
 
@@ -2930,6 +3062,287 @@
 #endif
 }
 
+//----------------------------tracing------------------------
+#ifndef PRODUCT
+void SWPointer::Tracer::print_depth() {
+  for (int ii = 0; ii<_depth; ++ii) tty->print("  ");
+}
+
+void SWPointer::Tracer::ctor_1 (Node* mem) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print(" %d SWPointer::SWPointer: start alignment analysis", mem->_idx); mem->dump();
+  }
+}
+
+void SWPointer::Tracer::ctor_2(Node* adr) {
+  if(_slp->is_trace_alignment()) {
+    //store_depth();
+    inc_depth();
+    print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump();
+    inc_depth();
+    print_depth(); tty->print(" %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump();
+  }
+}
+
+void SWPointer::Tracer::ctor_3(Node* adr, int i) {
+  if(_slp->is_trace_alignment()) {
+    inc_depth();
+    Node* offset = adr->in(AddPNode::Offset);
+    print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump();
+  }
+}
+
+void SWPointer::Tracer::ctor_4(Node* adr, int i) {
+  if(_slp->is_trace_alignment()) {
+    inc_depth();
+    print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i); adr->dump();
+  }
+}
+
+void SWPointer::Tracer::ctor_5(Node* adr, Node* base, int i) {
+  if(_slp->is_trace_alignment()) {
+    inc_depth();
+    if (base == adr) {
+      print_depth(); tty->print_cr("  \\ %d (adr) == %d (base) SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i);
+    } else if (!adr->is_AddP()) {
+      print_depth(); tty->print_cr("  \\ %d (adr) is NOT Addp SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, i);
+    }
+  }
+}
+
+void SWPointer::Tracer::ctor_6(Node* mem) {
+  if(_slp->is_trace_alignment()) {
+    //restore_depth();
+    print_depth(); tty->print_cr(" %d (adr) SWPointer::SWPointer: stop analysis", mem->_idx);
+  }
+}
+
+void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) {
+  if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) {
+    int is_member =  _slp->_lpt->is_member(_slp->_phase->get_loop(n_c));
+    int in_bb     =  _slp->in_bb(n);
+    print_depth(); tty->print("  \\ ");  tty->print_cr(" %d SWPointer::invariant  conditions differ: n_c %d", n->_idx, n_c->_idx);
+    print_depth(); tty->print("  \\ ");  tty->print_cr("is_member %d, in_bb %d", is_member, in_bb);
+    print_depth(); tty->print("  \\ ");  n->dump();
+    print_depth(); tty->print("  \\ ");  n_c->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_plus_offset_1(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx);
+    n->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_plus_offset_2(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx);
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_plus_offset_3(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx);
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_plus_offset_4(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx);
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_plus_offset_5(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx);
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_plus_offset_6(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx);
+    print_depth(); tty->print("  \\  %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_plus_offset_7(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx);
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_plus_offset_8(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: FAILED", n->_idx);
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_1(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_2(Node* n, int scale) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx);
+    print_depth(); tty->print_cr("  \\ SWPointer::scaled_iv: _scale (%d) != 0", scale);
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_3(Node* n, int scale) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale);
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_4(Node* n, int scale) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale);
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_5(Node* n, int scale) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale);
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_6(Node* n, int scale) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale);
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_7(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx);
+    print_depth(); tty->print_cr("  \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx);
+    inc_depth(); inc_depth();
+    print_depth(); n->in(1)->dump();
+    dec_depth(); dec_depth();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp->print();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int _offset, int mult) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, _offset);
+    print_depth(); tty->print_cr("  \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset, in(2) %d used to get mult = %d: _scale = %d, _offset = %d",
+    n->in(1)->_idx, n->in(2)->_idx, mult, scale, _offset);
+    inc_depth(); inc_depth();
+    print_depth(); n->in(1)->dump();
+    print_depth(); n->in(2)->dump();
+    dec_depth(); dec_depth();
+  }
+}
+
+void SWPointer::Tracer::scaled_iv_10(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED", n->_idx);
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_1(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump();
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_2(Node* n, int _offset) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset);
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_3(Node* n, int _offset) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset);
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_4(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx);
+    print_depth(); tty->print_cr("  \\ " JLONG_FORMAT " SWPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long());
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx);
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: _invar != NULL: ", _invar->_idx); _invar->dump();
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
+    n->_idx, _negate_invar, _invar->_idx, _offset);
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump();
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
+    n->_idx, _negate_invar, _invar->_idx, _offset);
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump();
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
+    n->_idx, _negate_invar, _invar->_idx, _offset);
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump();
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
+    print_depth(); tty->print("  \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump();
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
+    print_depth(); tty->print_cr("  \\ %d SWPointer::offset_plus_k: is invariant", n->_idx);
+  }
+}
+
+void SWPointer::Tracer::offset_plus_k_11(Node* n) {
+  if(_slp->is_trace_alignment()) {
+    print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx);
+  }
+}
+
+#endif
 // ========================= OrderedPair =====================
 
 const OrderedPair OrderedPair::initial;
@@ -3076,13 +3489,20 @@
 //
 // --------------------------------- vectorization/simd -----------------------------------
 //
+bool SuperWord::same_origin_idx(Node* a, Node* b) const {
+  return a != NULL && b != NULL && _clone_map.same_idx(a->_idx, b->_idx);
+}
+bool SuperWord::same_generation(Node* a, Node* b) const {
+  return a != NULL && b != NULL && _clone_map.same_gen(a->_idx, b->_idx);
+}
+
 Node*  SuperWord::find_phi_for_mem_dep(LoadNode* ld) {
   assert(in_bb(ld), "must be in block");
   if (_clone_map.gen(ld->_idx) == _ii_first) {
 #ifndef PRODUCT
     if (_vector_loop_debug) {
       tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(ld->_idx)=%d",
-                    _clone_map.gen(ld->_idx));
+        _clone_map.gen(ld->_idx));
     }
 #endif
     return NULL; //we think that any ld in the first gen being vectorizable
@@ -3094,18 +3514,18 @@
 #ifndef PRODUCT
     if (_vector_loop_debug) {
       tty->print_cr("SuperWord::find_phi_for_mem_dep input node %d to load %d has no other outputs and edge mem->load cannot be removed",
-                    mem->_idx, ld->_idx);
+        mem->_idx, ld->_idx);
       ld->dump();
       mem->dump();
     }
 #endif
     return NULL;
   }
-  if (!in_bb(mem) || _clone_map.gen(mem->_idx) == _clone_map.gen(ld->_idx)) {
+  if (!in_bb(mem) || same_generation(mem, ld)) {
 #ifndef PRODUCT
     if (_vector_loop_debug) {
       tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d",
-                    _clone_map.gen(mem->_idx));
+        _clone_map.gen(mem->_idx));
     }
 #endif
     return NULL; // does not depend on loop volatile node or depends on the same generation
@@ -3136,7 +3556,7 @@
 #ifndef PRODUCT
     if (_vector_loop_debug) {
       tty->print_cr("SuperWord::find_phi_for_mem_dep load %d is not vectorizable node, its phi %d is not _mem_slice_head",
-                    ld->_idx, phi->_idx);
+        ld->_idx, phi->_idx);
       ld->dump();
       phi->dump();
     }
@@ -3151,11 +3571,11 @@
 Node* SuperWord::first_node(Node* nd) {
   for (int ii = 0; ii < _iteration_first.length(); ii++) {
     Node* nnn = _iteration_first.at(ii);
-    if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) {
+    if (same_origin_idx(nnn, nd)) {
 #ifndef PRODUCT
       if (_vector_loop_debug) {
         tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)",
-                      nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx));
+          nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx));
       }
 #endif
       return nnn;
@@ -3165,7 +3585,7 @@
 #ifndef PRODUCT
   if (_vector_loop_debug) {
     tty->print_cr("SuperWord::first_node: did not find first iteration node for %d (_clone_map.idx(nd->_idx)=%d)",
-                  nd->_idx, _clone_map.idx(nd->_idx));
+      nd->_idx, _clone_map.idx(nd->_idx));
   }
 #endif
   return 0;
@@ -3174,11 +3594,11 @@
 Node* SuperWord::last_node(Node* nd) {
   for (int ii = 0; ii < _iteration_last.length(); ii++) {
     Node* nnn = _iteration_last.at(ii);
-    if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) {
+    if (same_origin_idx(nnn, nd)) {
 #ifndef PRODUCT
       if (_vector_loop_debug) {
         tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d",
-                      _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx));
+          _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx));
       }
 #endif
       return nnn;
@@ -3219,9 +3639,11 @@
         } else if (_ii_first != _clone_map.gen(ii->_idx)) {
 #ifndef PRODUCT
           if (TraceSuperWord && Verbose) {
-            tty->print_cr("SuperWord::mark_generations _ii_first error - found different generations in two nodes ");
+            tty->print_cr("SuperWord::mark_generations: _ii_first was found before and not equal to one in this node (%d)", _ii_first);
             ii->dump();
-            ii_err->dump();
+            if (ii_err!= 0) {
+              ii_err->dump();
+            }
           }
 #endif
           return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized
@@ -3252,8 +3674,7 @@
   }
 
   // building order of iterations
-  assert(_ii_order.length() == 0, "should be empty");
-  if (ii_err != 0) {
+  if (_ii_order.length() == 0 && ii_err != 0) {
     assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb");
     Node* nd = ii_err;
     while(_clone_map.gen(nd->_idx) != _ii_last) {
@@ -3261,7 +3682,7 @@
       bool found = false;
       for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) {
         Node* use = nd->fast_out(i);
-        if (_clone_map.idx(use->_idx) == _clone_map.idx(nd->_idx) && use->as_Store()->in(MemNode::Memory) == nd) {
+        if (same_origin_idx(use, nd) && use->as_Store()->in(MemNode::Memory) == nd) {
           found = true;
           nd = use;
           break;
@@ -3303,7 +3724,7 @@
 
 bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) {
   assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes");
-  assert(_clone_map.idx(gold->_idx) == _clone_map.idx(fix->_idx), "should be clones of the same node");
+  assert(same_origin_idx(gold, fix), "should be clones of the same node");
   Node* gin1 = gold->in(1);
   Node* gin2 = gold->in(2);
   Node* fin1 = fix->in(1);
@@ -3311,12 +3732,12 @@
   bool swapped = false;
 
   if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin1)) {
-    if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin1->_idx) &&
-        _clone_map.idx(gin2->_idx) == _clone_map.idx(fin2->_idx)) {
+    if (same_origin_idx(gin1, fin1) &&
+        same_origin_idx(gin2, fin2)) {
       return true; // nothing to fix
     }
-    if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin2->_idx) &&
-        _clone_map.idx(gin2->_idx) == _clone_map.idx(fin1->_idx)) {
+    if (same_origin_idx(gin1, fin2) &&
+        same_origin_idx(gin2, fin1)) {
       fix->swap_edges(1, 2);
       swapped = true;
     }
@@ -3364,7 +3785,7 @@
       for (int gen = 1; gen < _ii_order.length(); ++gen) {
         for (int kk = 0; kk < _block.length(); kk++) {
           Node* clone = _block.at(kk);
-          if (_clone_map.idx(clone->_idx) == _clone_map.idx(nd->_idx) &&
+          if (same_origin_idx(clone, nd) &&
               _clone_map.gen(clone->_idx) == _ii_order.at(gen)) {
             if (nd->is_Add() || nd->is_Mul()) {
               fix_commutative_inputs(nd, clone);
@@ -3429,13 +3850,12 @@
       if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) {
         for (int i = 0; i < _block.length(); i++) {
           Node* ld2 = _block.at(i);
-          if (ld2->is_Load() &&
-              _clone_map.idx(ld->_idx) == _clone_map.idx(ld2->_idx) &&
-              _clone_map.gen(ld->_idx) != _clone_map.gen(ld2->_idx)) { // <= do not collect the first generation ld
+          if (ld2->is_Load() && same_origin_idx(ld, ld2) &&
+              !same_generation(ld, ld2)) { // <= do not collect the first generation ld
 #ifndef PRODUCT
             if (_vector_loop_debug) {
               tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)",
-                            ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx);
+                ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx);
             }
 #endif
             // could not do on-the-fly, since iterator is immutable
@@ -3453,7 +3873,7 @@
 #ifndef PRODUCT
       if (_vector_loop_debug) {
         tty->print_cr("SuperWord::hoist_loads_in_graph replacing MemNode::Memory(%d) edge in %d with one from %d",
-                      MemNode::Memory, ld->_idx, phi->_idx);
+          MemNode::Memory, ld->_idx, phi->_idx);
       }
 #endif
       _igvn.replace_input_of(ld, MemNode::Memory, phi);
--- a/hotspot/src/share/vm/opto/superword.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/superword.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -203,6 +203,7 @@
 // -----------------------------SuperWord---------------------------------
 // Transforms scalar operations into packed (superword) operations.
 class SuperWord : public ResourceObj {
+ friend class SWPointer;
  private:
   PhaseIdealLoop* _phase;
   Arena*          _arena;
@@ -247,8 +248,17 @@
   PhaseIdealLoop* phase()          { return _phase; }
   IdealLoopTree* lpt()             { return _lpt; }
   PhiNode* iv()                    { return _iv; }
+
   bool early_return()              { return _early_return; }
 
+#ifndef PRODUCT
+  bool     is_debug()              { return _vector_loop_debug > 0; }
+  bool     is_trace_alignment()    { return (_vector_loop_debug & 2) > 0; }
+  bool     is_trace_mem_slice()    { return (_vector_loop_debug & 4) > 0; }
+  bool     is_trace_loop()         { return (_vector_loop_debug & 8) > 0; }
+  bool     is_trace_adjacent()     { return (_vector_loop_debug & 16) > 0; }
+#endif
+  bool     do_vector_loop()        { return _do_vector_loop; }
  private:
   IdealLoopTree* _lpt;             // Current loop tree node
   LoopNode*      _lp;              // Current LoopNode
@@ -257,12 +267,14 @@
   bool           _race_possible;   // In cases where SDMU is true
   bool           _early_return;    // True if we do not initialize
   bool           _do_vector_loop;  // whether to do vectorization/simd style
-  bool           _vector_loop_debug; // provide more printing in debug mode
   int            _num_work_vecs;   // Number of non memory vector operations
   int            _num_reductions;  // Number of reduction expressions applied
   int            _ii_first;        // generation with direct deps from mem phi
   int            _ii_last;         // generation with direct deps to   mem phi
   GrowableArray<int> _ii_order;
+#ifndef PRODUCT
+  uintx          _vector_loop_debug; // provide more printing in debug mode
+#endif
 
   // Accessors
   Arena* arena()                   { return _arena; }
@@ -325,12 +337,20 @@
   Node_List* my_pack(Node* n)                { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
   void set_my_pack(Node* n, Node_List* p)    { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
 
+  // CloneMap utilities
+  bool same_origin_idx(Node* a, Node* b) const;
+  bool same_generation(Node* a, Node* b) const;
+
   // methods
 
   // Extract the superword level parallelism
   void SLP_extract();
   // Find the adjacent memory references and create pack pairs for them.
   void find_adjacent_refs();
+  // Tracing support
+  #ifndef PRODUCT
+  void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment);
+  #endif
   // Find a memory reference to align the loop induction variable to.
   MemNode* find_align_to_ref(Node_List &memops);
   // Calculate loop's iv adjustment for this memory ops.
@@ -340,13 +360,13 @@
   // rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only)
   bool hoist_loads_in_graph();
   // Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi
-  // Return false if failed.
+  // Return false if failed
   Node* find_phi_for_mem_dep(LoadNode* ld);
   // Return same node but from the first generation. Return 0, if not found
   Node* first_node(Node* nd);
   // Return same node as this but from the last generation. Return 0, if not found
   Node* last_node(Node* n);
-  // Mark nodes belonging to first and last generation,
+  // Mark nodes belonging to first and last generation
   // returns first generation index or -1 if vectorization/simd is impossible
   int mark_generations();
   // swapping inputs of commutative instruction (Add or Mul)
@@ -483,10 +503,7 @@
   IdealLoopTree*  lpt()   { return _slp->lpt(); }
   PhiNode*        iv()    { return _slp->iv();  } // Induction var
 
-  bool invariant(Node* n) {
-    Node *n_c = phase()->get_ctrl(n);
-    return !lpt()->is_member(phase()->get_loop(n_c));
-  }
+  bool invariant(Node* n);
 
   // Match: k*iv + offset
   bool scaled_iv_plus_offset(Node* n);
@@ -545,6 +562,76 @@
   static bool comparable(int cmp) { return cmp < NotComparable; }
 
   void print();
+
+#ifndef PRODUCT
+  class Tracer {
+    friend class SuperWord;
+    friend class SWPointer;
+    SuperWord*   _slp;
+    static int   _depth;
+    int _depth_save;
+    void print_depth();
+    int  depth() const    { return _depth; }
+    void set_depth(int d) { _depth = d; }
+    void inc_depth()      { _depth++;}
+    void dec_depth()      { if (_depth > 0) _depth--;}
+    void store_depth()    {_depth_save = _depth;}
+    void restore_depth()  {_depth = _depth_save;}
+
+    class Depth {
+      friend class Tracer;
+      friend class SWPointer;
+      friend class SuperWord;
+      Depth()  { ++_depth; }
+      Depth(int x)  { _depth = 0; }
+      ~Depth() { if (_depth > 0) --_depth;}
+    };
+    Tracer (SuperWord* slp) : _slp(slp) {}
+
+    // tracing functions
+    void ctor_1(Node* mem);
+    void ctor_2(Node* adr);
+    void ctor_3(Node* adr, int i);
+    void ctor_4(Node* adr, int i);
+    void ctor_5(Node* adr, Node* base,  int i);
+    void ctor_6(Node* mem);
+
+    void invariant_1(Node *n, Node *n_c);
+
+    void scaled_iv_plus_offset_1(Node* n);
+    void scaled_iv_plus_offset_2(Node* n);
+    void scaled_iv_plus_offset_3(Node* n);
+    void scaled_iv_plus_offset_4(Node* n);
+    void scaled_iv_plus_offset_5(Node* n);
+    void scaled_iv_plus_offset_6(Node* n);
+    void scaled_iv_plus_offset_7(Node* n);
+    void scaled_iv_plus_offset_8(Node* n);
+
+    void scaled_iv_1(Node* n);
+    void scaled_iv_2(Node* n, int scale);
+    void scaled_iv_3(Node* n, int scale);
+    void scaled_iv_4(Node* n, int scale);
+    void scaled_iv_5(Node* n, int scale);
+    void scaled_iv_6(Node* n, int scale);
+    void scaled_iv_7(Node* n);
+    void scaled_iv_8(Node* n, SWPointer* tmp);
+    void scaled_iv_9(Node* n, int _scale, int _offset, int mult);
+    void scaled_iv_10(Node* n);
+
+    void offset_plus_k_1(Node* n);
+    void offset_plus_k_2(Node* n, int _offset);
+    void offset_plus_k_3(Node* n, int _offset);
+    void offset_plus_k_4(Node* n);
+    void offset_plus_k_5(Node* n, Node* _invar);
+    void offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset);
+    void offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset);
+    void offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset);
+    void offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset);
+    void offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset);
+    void offset_plus_k_11(Node* n);
+
+  } _tracer;//TRacer;
+#endif
 };
 
 
--- a/hotspot/src/share/vm/opto/type.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/type.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -200,8 +200,7 @@
 
 
 //-----------------------make_from_constant------------------------------------
-const Type* Type::make_from_constant(ciConstant constant,
-                                     bool require_constant, bool is_autobox_cache) {
+const Type* Type::make_from_constant(ciConstant constant, bool require_constant) {
   switch (constant.basic_type()) {
   case T_BOOLEAN:  return TypeInt::make(constant.as_boolean());
   case T_CHAR:     return TypeInt::make(constant.as_char());
@@ -222,14 +221,57 @@
       if (oop_constant->is_null_object()) {
         return Type::get_zero_type(T_OBJECT);
       } else if (require_constant || oop_constant->should_be_constant()) {
-        return TypeOopPtr::make_from_constant(oop_constant, require_constant, is_autobox_cache);
+        return TypeOopPtr::make_from_constant(oop_constant, require_constant);
+      }
+    }
+  case T_ILLEGAL:
+    // Invalid ciConstant returned due to OutOfMemoryError in the CI
+    assert(Compile::current()->env()->failing(), "otherwise should not see this");
+    return NULL;
+  }
+  // Fall through to failure
+  return NULL;
+}
+
+
+const Type* Type::make_constant(ciField* field, Node* obj) {
+  if (!field->is_constant())  return NULL;
+
+  const Type* con_type = NULL;
+  if (field->is_static()) {
+    // final static field
+    con_type = Type::make_from_constant(field->constant_value(), /*require_const=*/true);
+    if (Compile::current()->eliminate_boxing() && field->is_autobox_cache() && con_type != NULL) {
+      con_type = con_type->is_aryptr()->cast_to_autobox_cache(true);
+    }
+  } else {
+    // final or stable non-static field
+    // Treat final non-static fields of trusted classes (classes in
+    // java.lang.invoke and sun.invoke packages and subpackages) as
+    // compile time constants.
+    if (obj->is_Con()) {
+      const TypeOopPtr* oop_ptr = obj->bottom_type()->isa_oopptr();
+      ciObject* constant_oop = oop_ptr->const_oop();
+      ciConstant constant = field->constant_value_of(constant_oop);
+      con_type = Type::make_from_constant(constant, /*require_const=*/true);
+    }
+  }
+  if (FoldStableValues && field->is_stable() && con_type != NULL) {
+    if (con_type->is_zero_type()) {
+      return NULL; // the field hasn't been initialized yet
+    } else if (con_type->isa_oopptr()) {
+      const Type* stable_type = Type::get_const_type(field->type());
+      if (field->type()->is_array_klass()) {
+        int stable_dimension = field->type()->as_array_klass()->dimension();
+        stable_type = stable_type->is_aryptr()->cast_to_stable(true, stable_dimension);
+      }
+      if (stable_type != NULL) {
+        con_type = con_type->join_speculative(stable_type);
       }
     }
   }
-  // Fall through to failure
-  return NULL;
-}
-
+  return con_type;
+}
 
 //------------------------------make-------------------------------------------
 // Create a simple Type, with default empty symbol sets.  Then hashcons it
@@ -3009,9 +3051,7 @@
 
 //------------------------------make_from_constant-----------------------------
 // Make a java pointer from an oop constant
-const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o,
-                                                 bool require_constant,
-                                                 bool is_autobox_cache) {
+const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o, bool require_constant) {
   assert(!o->is_null_object(), "null object not yet handled here.");
   ciKlass* klass = o->klass();
   if (klass->is_instance_klass()) {
@@ -3026,10 +3066,6 @@
     // Element is an object array. Recursively call ourself.
     const TypeOopPtr *etype =
       TypeOopPtr::make_from_klass_raw(klass->as_obj_array_klass()->element_klass());
-    if (is_autobox_cache) {
-      // The pointers in the autobox arrays are always non-null.
-      etype = etype->cast_to_ptr_type(TypePtr::NotNull)->is_oopptr();
-    }
     const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length()));
     // We used to pass NotNull in here, asserting that the sub-arrays
     // are all not-null.  This is not true in generally, as code can
@@ -3039,7 +3075,7 @@
     } else if (!o->should_be_constant()) {
       return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
     }
-    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, NULL, InlineDepthBottom, is_autobox_cache);
+    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
     return arr;
   } else if (klass->is_type_array_klass()) {
     // Element is an typeArray
@@ -3940,7 +3976,6 @@
   return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth);
 }
 
-
 //------------------------------cast_to_stable---------------------------------
 const TypeAryPtr* TypeAryPtr::cast_to_stable(bool stable, int stable_dimension) const {
   if (stable_dimension <= 0 || (stable_dimension == 1 && stable == this->is_stable()))
@@ -3969,6 +4004,18 @@
   return dim;
 }
 
+//----------------------cast_to_autobox_cache-----------------------------------
+const TypeAryPtr* TypeAryPtr::cast_to_autobox_cache(bool cache) const {
+  if (is_autobox_cache() == cache)  return this;
+  const TypeOopPtr* etype = elem()->make_oopptr();
+  if (etype == NULL)  return this;
+  // The pointers in the autobox arrays are always non-null.
+  TypePtr::PTR ptr_type = cache ? TypePtr::NotNull : TypePtr::AnyNull;
+  etype = etype->cast_to_ptr_type(TypePtr::NotNull)->is_oopptr();
+  const TypeAry* new_ary = TypeAry::make(etype, size(), is_stable());
+  return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth, cache);
+}
+
 //------------------------------eq---------------------------------------------
 // Structural equality check for Type representations
 bool TypeAryPtr::eq( const Type *t ) const {
@@ -4455,7 +4502,7 @@
 // TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
 // constants
 bool TypeMetadataPtr::singleton(void) const {
-  // detune optimizer to not generate constant metadta + constant offset as a constant!
+  // detune optimizer to not generate constant metadata + constant offset as a constant!
   // TopPTR, Null, AnyNull, Constant are all singletons
   return (_offset == 0) && !below_centerline(_ptr);
 }
--- a/hotspot/src/share/vm/opto/type.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/opto/type.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -412,8 +412,9 @@
   static const Type* get_typeflow_type(ciType* type);
 
   static const Type* make_from_constant(ciConstant constant,
-                                        bool require_constant = false,
-                                        bool is_autobox_cache = false);
+                                        bool require_constant = false);
+
+  static const Type* make_constant(ciField* field, Node* obj);
 
   // Speculative type helper methods. See TypePtr.
   virtual const TypePtr* speculative() const                                  { return NULL; }
@@ -973,8 +974,7 @@
   // may return a non-singleton type.
   // If require_constant, produce a NULL if a singleton is not possible.
   static const TypeOopPtr* make_from_constant(ciObject* o,
-                                              bool require_constant = false,
-                                              bool not_null_elements = false);
+                                              bool require_constant = false);
 
   // Make a generic (unclassed) pointer to an oop.
   static const TypeOopPtr* make(PTR ptr, int offset, int instance_id,
@@ -1184,6 +1184,8 @@
   const TypeAryPtr* cast_to_stable(bool stable, int stable_dimension = 1) const;
   int stable_dimension() const;
 
+  const TypeAryPtr* cast_to_autobox_cache(bool cache) const;
+
   // Convenience common pre-built types.
   static const TypeAryPtr *RANGE;
   static const TypeAryPtr *OOPS;
@@ -1674,12 +1676,12 @@
 
 inline const TypePtr* Type::make_ptr() const {
   return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype() :
-    ((_base == NarrowKlass) ? is_narrowklass()->get_ptrtype() :
-     (isa_ptr() ? is_ptr() : NULL));
+                              ((_base == NarrowKlass) ? is_narrowklass()->get_ptrtype() :
+                                                       isa_ptr());
 }
 
 inline const TypeOopPtr* Type::make_oopptr() const {
-  return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype()->is_oopptr() : is_oopptr();
+  return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype()->isa_oopptr() : isa_oopptr();
 }
 
 inline const TypeNarrowOop* Type::make_narrowoop() const {
@@ -1689,7 +1691,7 @@
 
 inline const TypeNarrowKlass* Type::make_narrowklass() const {
   return (_base == NarrowKlass) ? is_narrowklass() :
-                                (isa_ptr() ? TypeNarrowKlass::make(is_ptr()) : NULL);
+                                  (isa_ptr() ? TypeNarrowKlass::make(is_ptr()) : NULL);
 }
 
 inline bool Type::is_floatingpoint() const {
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -91,7 +91,7 @@
 int    Arguments::_sun_java_launcher_pid        = -1;
 bool   Arguments::_sun_java_launcher_is_altjvm  = false;
 
-// These parameters are reset in method parse_vm_init_args(JavaVMInitArgs*)
+// These parameters are reset in method parse_vm_init_args()
 bool   Arguments::_AlwaysCompileLoopMethods     = AlwaysCompileLoopMethods;
 bool   Arguments::_UseOnStackReplacement        = UseOnStackReplacement;
 bool   Arguments::_BackgroundCompilation        = BackgroundCompilation;
@@ -2251,7 +2251,9 @@
 
 // Parse JavaVMInitArgs structure
 
-jint Arguments::parse_vm_init_args(const JavaVMInitArgs* args) {
+jint Arguments::parse_vm_init_args(const JavaVMInitArgs *java_tool_options_args,
+                                   const JavaVMInitArgs *java_options_args,
+                                   const JavaVMInitArgs *cmd_line_args) {
   // For components of the system classpath.
   SysClassPath scp(Arguments::get_sysclasspath());
   bool scp_assembly_required = false;
@@ -2269,20 +2271,25 @@
   // Setup flags for mixed which is the default
   set_mode_flags(_mixed);
 
-  // Parse JAVA_TOOL_OPTIONS environment variable (if present)
-  jint result = parse_java_tool_options_environment_variable(&scp, &scp_assembly_required);
+  // Parse args structure generated from JAVA_TOOL_OPTIONS environment
+  // variable (if present).
+  jint result = parse_each_vm_init_arg(
+      java_tool_options_args, &scp, &scp_assembly_required, Flag::ENVIRON_VAR);
   if (result != JNI_OK) {
     return result;
   }
 
-  // Parse JavaVMInitArgs structure passed in
-  result = parse_each_vm_init_arg(args, &scp, &scp_assembly_required, Flag::COMMAND_LINE);
+  // Parse args structure generated from the command line flags.
+  result = parse_each_vm_init_arg(cmd_line_args, &scp, &scp_assembly_required,
+                                  Flag::COMMAND_LINE);
   if (result != JNI_OK) {
     return result;
   }
 
-  // Parse _JAVA_OPTIONS environment variable (if present) (mimics classic VM)
-  result = parse_java_options_environment_variable(&scp, &scp_assembly_required);
+  // Parse args structure generated from the _JAVA_OPTIONS environment
+  // variable (if present) (mimics classic VM)
+  result = parse_each_vm_init_arg(
+      java_options_args, &scp, &scp_assembly_required, Flag::ENVIRON_VAR);
   if (result != JNI_OK) {
     return result;
   }
@@ -3385,20 +3392,73 @@
   return JNI_OK;
 }
 
-jint Arguments::parse_java_options_environment_variable(SysClassPath* scp_p, bool* scp_assembly_required_p) {
-  return parse_options_environment_variable("_JAVA_OPTIONS", scp_p,
-                                            scp_assembly_required_p);
+// Helper class for controlling the lifetime of JavaVMInitArgs
+// objects.  The contents of the JavaVMInitArgs are guaranteed to be
+// deleted on the destruction of the ScopedVMInitArgs object.
+class ScopedVMInitArgs : public StackObj {
+ private:
+  JavaVMInitArgs _args;
+
+ public:
+  ScopedVMInitArgs() {
+    _args.version = JNI_VERSION_1_2;
+    _args.nOptions = 0;
+    _args.options = NULL;
+    _args.ignoreUnrecognized = false;
+  }
+
+  // Populates the JavaVMInitArgs object represented by this
+  // ScopedVMInitArgs object with the arguments in options.  The
+  // allocated memory is deleted by the destructor.  If this method
+  // returns anything other than JNI_OK, then this object is in a
+  // partially constructed state, and should be abandoned.
+  jint set_args(GrowableArray<JavaVMOption>* options) {
+    JavaVMOption* options_arr = NEW_C_HEAP_ARRAY_RETURN_NULL(
+        JavaVMOption, options->length(), mtInternal);
+    if (options_arr == NULL) {
+      return JNI_ENOMEM;
+    }
+    _args.options = options_arr;
+
+    for (int i = 0; i < options->length(); i++) {
+      options_arr[i] = options->at(i);
+      options_arr[i].optionString = os::strdup(options_arr[i].optionString);
+      if (options_arr[i].optionString == NULL) {
+        // Rely on the destructor to do cleanup.
+        _args.nOptions = i;
+        return JNI_ENOMEM;
+      }
+    }
+
+    _args.nOptions = options->length();
+    _args.ignoreUnrecognized = IgnoreUnrecognizedVMOptions;
+    return JNI_OK;
+  }
+
+  JavaVMInitArgs* get() { return &_args; }
+
+  ~ScopedVMInitArgs() {
+    if (_args.options == NULL) return;
+    for (int i = 0; i < _args.nOptions; i++) {
+      os::free(_args.options[i].optionString);
+    }
+    FREE_C_HEAP_ARRAY(JavaVMOption, _args.options);
+  }
+};
+
+jint Arguments::parse_java_options_environment_variable(ScopedVMInitArgs* args) {
+  return parse_options_environment_variable("_JAVA_OPTIONS", args);
 }
 
-jint Arguments::parse_java_tool_options_environment_variable(SysClassPath* scp_p, bool* scp_assembly_required_p) {
-  return parse_options_environment_variable("JAVA_TOOL_OPTIONS", scp_p,
-                                            scp_assembly_required_p);
+jint Arguments::parse_java_tool_options_environment_variable(ScopedVMInitArgs* args) {
+  return parse_options_environment_variable("JAVA_TOOL_OPTIONS", args);
 }
 
-jint Arguments::parse_options_environment_variable(const char* name, SysClassPath* scp_p, bool* scp_assembly_required_p) {
+jint Arguments::parse_options_environment_variable(const char* name,
+                                                   ScopedVMInitArgs* vm_args) {
   char *buffer = ::getenv(name);
 
-  // Don't check this variable if user has special privileges
+  // Don't check this environment variable if user has special privileges
   // (e.g. unix su command).
   if (buffer == NULL || os::have_special_privileges()) {
     return JNI_OK;
@@ -3443,48 +3503,20 @@
         *wrt++ = *rd++;                     // copy to option string
       }
     }
-    // Need to check if we're done before writing a NULL,
-    // because the write could be to the byte that rd is pointing to.
-    if (*rd++ == 0) {
-      *wrt = 0;
-      break;
+    if (*rd != 0) {
+      // In this case, the assignment to wrt below will make *rd nul,
+      // which will interfere with the next loop iteration.
+      rd++;
     }
     *wrt = 0;                               // Zero terminate option
   }
-  JavaVMOption* options_arr =
-      NEW_C_HEAP_ARRAY_RETURN_NULL(JavaVMOption, options->length(), mtInternal);
-  if (options_arr == NULL) {
-    delete options;
-    os::free(buffer);
-    return JNI_ENOMEM;
-  }
-  for (int i = 0; i < options->length(); i++) {
-    options_arr[i] = options->at(i);
-  }
-
-  // Construct JavaVMInitArgs structure and parse as if it was part of the command line
-  JavaVMInitArgs vm_args;
-  vm_args.version = JNI_VERSION_1_2;
-  vm_args.options = options_arr;
-  vm_args.nOptions = options->length();
-  vm_args.ignoreUnrecognized = IgnoreUnrecognizedVMOptions;
-
-  if (PrintVMOptions) {
-    const char* tail;
-    for (int i = 0; i < vm_args.nOptions; i++) {
-      const JavaVMOption *option = vm_args.options + i;
-      if (match_option(option, "-XX:", &tail)) {
-        logOption(tail);
-      }
-    }
-  }
-
-  jint result = parse_each_vm_init_arg(&vm_args, scp_p, scp_assembly_required_p,
-                                       Flag::ENVIRON_VAR);
-  FREE_C_HEAP_ARRAY(JavaVMOption, options_arr);
+
+  // Fill out JavaVMInitArgs structure.
+  jint status = vm_args->set_args(options);
+
   delete options;
   os::free(buffer);
-  return result;
+  return status;
 }
 
 void Arguments::set_shared_spaces_flags() {
@@ -3567,32 +3599,18 @@
 }
 #endif // PRODUCT
 
-// Parse entry point called from JNI_CreateJavaVM
-
-jint Arguments::parse(const JavaVMInitArgs* args) {
-
-  // Initialize ranges and constraints
-  CommandLineFlagRangeList::init();
-  CommandLineFlagConstraintList::init();
-
+static jint match_special_option_and_act(const JavaVMInitArgs* args,
+                                        char** flags_file) {
   // Remaining part of option string
   const char* tail;
 
-  // If flag "-XX:Flags=flags-file" is used it will be the first option to be processed.
-  const char* hotspotrc = ".hotspotrc";
-  bool settings_file_specified = false;
-  bool needs_hotspotrc_warning = false;
-
-  const char* flags_file;
-  int index;
-  for (index = 0; index < args->nOptions; index++) {
-    const JavaVMOption *option = args->options + index;
+  for (int index = 0; index < args->nOptions; index++) {
+    const JavaVMOption* option = args->options + index;
     if (ArgumentsExt::process_options(option)) {
       continue;
     }
     if (match_option(option, "-XX:Flags=", &tail)) {
-      flags_file = tail;
-      settings_file_specified = true;
+      *flags_file = (char *) tail;
       continue;
     }
     if (match_option(option, "-XX:+PrintVMOptions")) {
@@ -3646,10 +3664,69 @@
     }
 #endif
   }
+  return JNI_OK;
+}
+
+static void print_options(const JavaVMInitArgs *args) {
+  const char* tail;
+  for (int index = 0; index < args->nOptions; index++) {
+    const JavaVMOption *option = args->options + index;
+    if (match_option(option, "-XX:", &tail)) {
+      logOption(tail);
+    }
+  }
+}
+
+// Parse entry point called from JNI_CreateJavaVM
+
+jint Arguments::parse(const JavaVMInitArgs* args) {
+
+  // Initialize ranges and constraints
+  CommandLineFlagRangeList::init();
+  CommandLineFlagConstraintList::init();
+
+  // If flag "-XX:Flags=flags-file" is used it will be the first option to be processed.
+  const char* hotspotrc = ".hotspotrc";
+  char* flags_file = NULL;
+  bool settings_file_specified = false;
+  bool needs_hotspotrc_warning = false;
+  ScopedVMInitArgs java_tool_options_args;
+  ScopedVMInitArgs java_options_args;
+
+  jint code =
+      parse_java_tool_options_environment_variable(&java_tool_options_args);
+  if (code != JNI_OK) {
+    return code;
+  }
+
+  code = parse_java_options_environment_variable(&java_options_args);
+  if (code != JNI_OK) {
+    return code;
+  }
+
+  code =
+      match_special_option_and_act(java_tool_options_args.get(), &flags_file);
+  if (code != JNI_OK) {
+    return code;
+  }
+
+  code = match_special_option_and_act(args, &flags_file);
+  if (code != JNI_OK) {
+    return code;
+  }
+
+  code = match_special_option_and_act(java_options_args.get(), &flags_file);
+  if (code != JNI_OK) {
+    return code;
+  }
+
+  settings_file_specified = (flags_file != NULL);
 
   if (IgnoreUnrecognizedVMOptions) {
     // uncast const to modify the flag args->ignoreUnrecognized
     *(jboolean*)(&args->ignoreUnrecognized) = true;
+    java_tool_options_args.get()->ignoreUnrecognized = true;
+    java_options_args.get()->ignoreUnrecognized = true;
   }
 
   // Parse specified settings file
@@ -3672,16 +3749,15 @@
   }
 
   if (PrintVMOptions) {
-    for (index = 0; index < args->nOptions; index++) {
-      const JavaVMOption *option = args->options + index;
-      if (match_option(option, "-XX:", &tail)) {
-        logOption(tail);
-      }
-    }
+    print_options(java_tool_options_args.get());
+    print_options(args);
+    print_options(java_options_args.get());
   }
 
   // Parse JavaVMInitArgs structure passed in, as well as JAVA_TOOL_OPTIONS and _JAVA_OPTIONS
-  jint result = parse_vm_init_args(args);
+  jint result = parse_vm_init_args(java_tool_options_args.get(),
+                                   java_options_args.get(), args);
+
   if (result != JNI_OK) {
     return result;
   }
--- a/hotspot/src/share/vm/runtime/arguments.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -220,6 +220,8 @@
   }
 };
 
+// Helper class for controlling the lifetime of JavaVMInitArgs objects.
+class ScopedVMInitArgs;
 
 class Arguments : AllStatic {
   friend class VMStructs;
@@ -374,10 +376,12 @@
   static bool process_argument(const char* arg, jboolean ignore_unrecognized, Flag::Flags origin);
   static void process_java_launcher_argument(const char*, void*);
   static void process_java_compiler_argument(char* arg);
-  static jint parse_options_environment_variable(const char* name, SysClassPath* scp_p, bool* scp_assembly_required_p);
-  static jint parse_java_tool_options_environment_variable(SysClassPath* scp_p, bool* scp_assembly_required_p);
-  static jint parse_java_options_environment_variable(SysClassPath* scp_p, bool* scp_assembly_required_p);
-  static jint parse_vm_init_args(const JavaVMInitArgs* args);
+  static jint parse_options_environment_variable(const char* name, ScopedVMInitArgs* vm_args);
+  static jint parse_java_tool_options_environment_variable(ScopedVMInitArgs* vm_args);
+  static jint parse_java_options_environment_variable(ScopedVMInitArgs* vm_args);
+  static jint parse_vm_init_args(const JavaVMInitArgs *java_tool_options_args,
+                                 const JavaVMInitArgs *java_options_args,
+                                 const JavaVMInitArgs *cmd_line_args);
   static jint parse_each_vm_init_arg(const JavaVMInitArgs* args, SysClassPath* scp_p, bool* scp_assembly_required_p, Flag::Flags origin);
   static jint finalize_vm_init_args(SysClassPath* scp_p, bool scp_assembly_required);
   static bool is_bad_option(const JavaVMOption* option, jboolean ignore, const char* option_type);
--- a/hotspot/src/share/vm/runtime/globals.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -1286,6 +1286,8 @@
                                                                             \
   experimental(intx, SyncVerbose, 0, "(Unstable)")                          \
                                                                             \
+  product(bool, InlineNotify, true, "intrinsify subset of notify" )         \
+                                                                            \
   experimental(intx, ClearFPUAtPark, 0, "(Unsafe, Unstable)")               \
                                                                             \
   experimental(intx, hashCode, 5,                                           \
@@ -4127,14 +4129,18 @@
              "Use the FP register for holding the frame pointer "           \
              "and not as a general purpose register.")                      \
                                                                             \
-  diagnostic(bool, CheckIntrinsics, trueInDebug,                            \
+  diagnostic(bool, CheckIntrinsics, true,                                   \
              "When a class C is loaded, check that "                        \
              "(1) all intrinsics defined by the VM for class C are present "\
              "in the loaded class file and are marked with the "            \
-             "@HotSpotIntrinsicCandidate annotation and also that "         \
+             "@HotSpotIntrinsicCandidate annotation, that "                 \
              "(2) there is an intrinsic registered for all loaded methods " \
              "that are annotated with the @HotSpotIntrinsicCandidate "      \
-             "annotation.")
+             "annotation, and that "                                        \
+             "(3) no orphan methods exist for class C (i.e., methods for "  \
+             "which the VM declares an intrinsic but that are not declared "\
+             "in the loaded class C. "                                      \
+             "Check (3) is available only in debug builds.")
 
 /*
  *  Macros for factoring of globals
--- a/hotspot/src/share/vm/runtime/objectMonitor.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/runtime/objectMonitor.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -430,6 +430,8 @@
   return -1;
 }
 
+#define MAX_RECHECK_INTERVAL 1000
+
 void NOINLINE ObjectMonitor::EnterI(TRAPS) {
   Thread * const Self = THREAD;
   assert(Self->is_Java_thread(), "invariant");
@@ -539,7 +541,7 @@
 
   TEVENT(Inflated enter - Contention);
   int nWakeups = 0;
-  int RecheckInterval = 1;
+  int recheckInterval = 1;
 
   for (;;) {
 
@@ -553,10 +555,12 @@
     // park self
     if (_Responsible == Self || (SyncFlags & 1)) {
       TEVENT(Inflated enter - park TIMED);
-      Self->_ParkEvent->park((jlong) RecheckInterval);
-      // Increase the RecheckInterval, but clamp the value.
-      RecheckInterval *= 8;
-      if (RecheckInterval > 1000) RecheckInterval = 1000;
+      Self->_ParkEvent->park((jlong) recheckInterval);
+      // Increase the recheckInterval, but clamp the value.
+      recheckInterval *= 8;
+      if (recheckInterval > MAX_RECHECK_INTERVAL) {
+        recheckInterval = MAX_RECHECK_INTERVAL;
+      }
     } else {
       TEVENT(Inflated enter - park UNTIMED);
       Self->_ParkEvent->park();
@@ -709,7 +713,7 @@
       // or java_suspend_self()
       jt->set_suspend_equivalent();
       if (SyncFlags & 1) {
-        Self->_ParkEvent->park((jlong)1000);
+        Self->_ParkEvent->park((jlong)MAX_RECHECK_INTERVAL);
       } else {
         Self->_ParkEvent->park();
       }
@@ -1636,15 +1640,8 @@
 // then instead of transferring a thread from the WaitSet to the EntryList
 // we might just dequeue a thread from the WaitSet and directly unpark() it.
 
-void ObjectMonitor::notify(TRAPS) {
-  CHECK_OWNER();
-  if (_WaitSet == NULL) {
-    TEVENT(Empty-Notify);
-    return;
-  }
-  DTRACE_MONITOR_PROBE(notify, this, object(), THREAD);
-
-  int Policy = Knob_MoveNotifyee;
+void ObjectMonitor::INotify(Thread * Self) {
+  const int policy = Knob_MoveNotifyee;
 
   Thread::SpinAcquire(&_WaitSetLock, "WaitSet - notify");
   ObjectWaiter * iterator = DequeueWaiter();
@@ -1652,74 +1649,76 @@
     TEVENT(Notify1 - Transfer);
     guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant");
     guarantee(iterator->_notified == 0, "invariant");
-    if (Policy != 4) {
+    // Disposition - what might we do with iterator ?
+    // a.  add it directly to the EntryList - either tail (policy == 1)
+    //     or head (policy == 0).
+    // b.  push it onto the front of the _cxq (policy == 2).
+    // For now we use (b).
+    if (policy != 4) {
       iterator->TState = ObjectWaiter::TS_ENTER;
     }
     iterator->_notified = 1;
-    Thread * Self = THREAD;
     iterator->_notifier_tid = Self->osthread()->thread_id();
 
-    ObjectWaiter * List = _EntryList;
-    if (List != NULL) {
-      assert(List->_prev == NULL, "invariant");
-      assert(List->TState == ObjectWaiter::TS_ENTER, "invariant");
-      assert(List != iterator, "invariant");
+    ObjectWaiter * list = _EntryList;
+    if (list != NULL) {
+      assert(list->_prev == NULL, "invariant");
+      assert(list->TState == ObjectWaiter::TS_ENTER, "invariant");
+      assert(list != iterator, "invariant");
     }
 
-    if (Policy == 0) {       // prepend to EntryList
-      if (List == NULL) {
+    if (policy == 0) {       // prepend to EntryList
+      if (list == NULL) {
         iterator->_next = iterator->_prev = NULL;
         _EntryList = iterator;
       } else {
-        List->_prev = iterator;
-        iterator->_next = List;
+        list->_prev = iterator;
+        iterator->_next = list;
         iterator->_prev = NULL;
         _EntryList = iterator;
       }
-    } else if (Policy == 1) {      // append to EntryList
-      if (List == NULL) {
+    } else if (policy == 1) {      // append to EntryList
+      if (list == NULL) {
         iterator->_next = iterator->_prev = NULL;
         _EntryList = iterator;
       } else {
         // CONSIDER:  finding the tail currently requires a linear-time walk of
         // the EntryList.  We can make tail access constant-time by converting to
         // a CDLL instead of using our current DLL.
-        ObjectWaiter * Tail;
-        for (Tail = List; Tail->_next != NULL; Tail = Tail->_next) /* empty */;
-        assert(Tail != NULL && Tail->_next == NULL, "invariant");
-        Tail->_next = iterator;
-        iterator->_prev = Tail;
+        ObjectWaiter * tail;
+        for (tail = list; tail->_next != NULL; tail = tail->_next) /* empty */;
+        assert(tail != NULL && tail->_next == NULL, "invariant");
+        tail->_next = iterator;
+        iterator->_prev = tail;
         iterator->_next = NULL;
       }
-    } else if (Policy == 2) {      // prepend to cxq
-      // prepend to cxq
-      if (List == NULL) {
+    } else if (policy == 2) {      // prepend to cxq
+      if (list == NULL) {
         iterator->_next = iterator->_prev = NULL;
         _EntryList = iterator;
       } else {
         iterator->TState = ObjectWaiter::TS_CXQ;
         for (;;) {
-          ObjectWaiter * Front = _cxq;
-          iterator->_next = Front;
-          if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) {
+          ObjectWaiter * front = _cxq;
+          iterator->_next = front;
+          if (Atomic::cmpxchg_ptr(iterator, &_cxq, front) == front) {
             break;
           }
         }
       }
-    } else if (Policy == 3) {      // append to cxq
+    } else if (policy == 3) {      // append to cxq
       iterator->TState = ObjectWaiter::TS_CXQ;
       for (;;) {
-        ObjectWaiter * Tail;
-        Tail = _cxq;
-        if (Tail == NULL) {
+        ObjectWaiter * tail = _cxq;
+        if (tail == NULL) {
           iterator->_next = NULL;
-          if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) {
+          if (Atomic::cmpxchg_ptr(iterator, &_cxq, NULL) == NULL) {
             break;
           }
         } else {
-          while (Tail->_next != NULL) Tail = Tail->_next;
-          Tail->_next = iterator;
-          iterator->_prev = Tail;
+          while (tail->_next != NULL) tail = tail->_next;
+          tail->_next = iterator;
+          iterator->_prev = tail;
           iterator->_next = NULL;
           break;
         }
@@ -1731,10 +1730,6 @@
       ev->unpark();
     }
 
-    if (Policy < 4) {
-      iterator->wait_reenter_begin(this);
-    }
-
     // _WaitSetLock protects the wait queue, not the EntryList.  We could
     // move the add-to-EntryList operation, above, outside the critical section
     // protected by _WaitSetLock.  In practice that's not useful.  With the
@@ -1742,133 +1737,63 @@
     // is the only thread that grabs _WaitSetLock.  There's almost no contention
     // on _WaitSetLock so it's not profitable to reduce the length of the
     // critical section.
+
+    if (policy < 4) {
+      iterator->wait_reenter_begin(this);
+    }
   }
-
   Thread::SpinRelease(&_WaitSetLock);
+}
 
-  if (iterator != NULL && ObjectMonitor::_sync_Notifications != NULL) {
-    ObjectMonitor::_sync_Notifications->inc();
+// Consider: a not-uncommon synchronization bug is to use notify() when
+// notifyAll() is more appropriate, potentially resulting in stranded
+// threads; this is one example of a lost wakeup. A useful diagnostic
+// option is to force all notify() operations to behave as notifyAll().
+//
+// Note: We can also detect many such problems with a "minimum wait".
+// When the "minimum wait" is set to a small non-zero timeout value
+// and the program does not hang whereas it did absent "minimum wait",
+// that suggests a lost wakeup bug. The '-XX:SyncFlags=1' option uses
+// a "minimum wait" for all park() operations; see the recheckInterval
+// variable and MAX_RECHECK_INTERVAL.
+
+void ObjectMonitor::notify(TRAPS) {
+  CHECK_OWNER();
+  if (_WaitSet == NULL) {
+    TEVENT(Empty-Notify);
+    return;
+  }
+  DTRACE_MONITOR_PROBE(notify, this, object(), THREAD);
+  INotify(THREAD);
+  if (ObjectMonitor::_sync_Notifications != NULL) {
+    ObjectMonitor::_sync_Notifications->inc(1);
   }
 }
 
 
+// The current implementation of notifyAll() transfers the waiters one-at-a-time
+// from the waitset to the EntryList. This could be done more efficiently with a
+// single bulk transfer but in practice it's not time-critical. Beware too,
+// that in prepend-mode we invert the order of the waiters. Let's say that the
+// waitset is "ABCD" and the EntryList is "XYZ". After a notifyAll() in prepend
+// mode the waitset will be empty and the EntryList will be "DCBAXYZ".
+
 void ObjectMonitor::notifyAll(TRAPS) {
   CHECK_OWNER();
-  ObjectWaiter* iterator;
   if (_WaitSet == NULL) {
     TEVENT(Empty-NotifyAll);
     return;
   }
+
   DTRACE_MONITOR_PROBE(notifyAll, this, object(), THREAD);
-
-  int Policy = Knob_MoveNotifyee;
-  int Tally = 0;
-  Thread::SpinAcquire(&_WaitSetLock, "WaitSet - notifyall");
-
-  for (;;) {
-    iterator = DequeueWaiter();
-    if (iterator == NULL) break;
-    TEVENT(NotifyAll - Transfer1);
-    ++Tally;
-
-    // Disposition - what might we do with iterator ?
-    // a.  add it directly to the EntryList - either tail or head.
-    // b.  push it onto the front of the _cxq.
-    // For now we use (a).
-
-    guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant");
-    guarantee(iterator->_notified == 0, "invariant");
-    iterator->_notified = 1;
-    Thread * Self = THREAD;
-    iterator->_notifier_tid = Self->osthread()->thread_id();
-    if (Policy != 4) {
-      iterator->TState = ObjectWaiter::TS_ENTER;
-    }
-
-    ObjectWaiter * List = _EntryList;
-    if (List != NULL) {
-      assert(List->_prev == NULL, "invariant");
-      assert(List->TState == ObjectWaiter::TS_ENTER, "invariant");
-      assert(List != iterator, "invariant");
-    }
-
-    if (Policy == 0) {       // prepend to EntryList
-      if (List == NULL) {
-        iterator->_next = iterator->_prev = NULL;
-        _EntryList = iterator;
-      } else {
-        List->_prev = iterator;
-        iterator->_next = List;
-        iterator->_prev = NULL;
-        _EntryList = iterator;
-      }
-    } else if (Policy == 1) {      // append to EntryList
-      if (List == NULL) {
-        iterator->_next = iterator->_prev = NULL;
-        _EntryList = iterator;
-      } else {
-        // CONSIDER:  finding the tail currently requires a linear-time walk of
-        // the EntryList.  We can make tail access constant-time by converting to
-        // a CDLL instead of using our current DLL.
-        ObjectWaiter * Tail;
-        for (Tail = List; Tail->_next != NULL; Tail = Tail->_next) /* empty */;
-        assert(Tail != NULL && Tail->_next == NULL, "invariant");
-        Tail->_next = iterator;
-        iterator->_prev = Tail;
-        iterator->_next = NULL;
-      }
-    } else if (Policy == 2) {      // prepend to cxq
-      // prepend to cxq
-      iterator->TState = ObjectWaiter::TS_CXQ;
-      for (;;) {
-        ObjectWaiter * Front = _cxq;
-        iterator->_next = Front;
-        if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) {
-          break;
-        }
-      }
-    } else if (Policy == 3) {      // append to cxq
-      iterator->TState = ObjectWaiter::TS_CXQ;
-      for (;;) {
-        ObjectWaiter * Tail;
-        Tail = _cxq;
-        if (Tail == NULL) {
-          iterator->_next = NULL;
-          if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) {
-            break;
-          }
-        } else {
-          while (Tail->_next != NULL) Tail = Tail->_next;
-          Tail->_next = iterator;
-          iterator->_prev = Tail;
-          iterator->_next = NULL;
-          break;
-        }
-      }
-    } else {
-      ParkEvent * ev = iterator->_event;
-      iterator->TState = ObjectWaiter::TS_RUN;
-      OrderAccess::fence();
-      ev->unpark();
-    }
-
-    if (Policy < 4) {
-      iterator->wait_reenter_begin(this);
-    }
-
-    // _WaitSetLock protects the wait queue, not the EntryList.  We could
-    // move the add-to-EntryList operation, above, outside the critical section
-    // protected by _WaitSetLock.  In practice that's not useful.  With the
-    // exception of  wait() timeouts and interrupts the monitor owner
-    // is the only thread that grabs _WaitSetLock.  There's almost no contention
-    // on _WaitSetLock so it's not profitable to reduce the length of the
-    // critical section.
+  int tally = 0;
+  while (_WaitSet != NULL) {
+    tally++;
+    INotify(THREAD);
   }
 
-  Thread::SpinRelease(&_WaitSetLock);
-
-  if (Tally != 0 && ObjectMonitor::_sync_Notifications != NULL) {
-    ObjectMonitor::_sync_Notifications->inc(Tally);
+  if (tally != 0 && ObjectMonitor::_sync_Notifications != NULL) {
+    ObjectMonitor::_sync_Notifications->inc(tally);
   }
 }
 
--- a/hotspot/src/share/vm/runtime/objectMonitor.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/runtime/objectMonitor.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -332,7 +332,7 @@
  private:
   void      AddWaiter(ObjectWaiter * waiter);
   static    void DeferredInitialize();
-
+  void      INotify(Thread * Self);
   ObjectWaiter * DequeueWaiter();
   void      DequeueSpecificWaiter(ObjectWaiter * waiter);
   void      EnterI(TRAPS);
--- a/hotspot/src/share/vm/runtime/rframe.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/runtime/rframe.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -81,7 +81,7 @@
 : InterpretedRFrame(fr, thread, callee) {}
 
 RFrame* RFrame::new_RFrame(frame fr, JavaThread* thread, RFrame*const  callee) {
-  RFrame* rf;
+  RFrame* rf = NULL;
   int dist = callee ? callee->distance() : -1;
   if (fr.is_interpreted_frame()) {
     rf = new InterpretedRFrame(fr, thread, callee);
@@ -93,8 +93,10 @@
   } else {
     assert(false, "Unhandled frame type");
   }
-  rf->set_distance(dist);
-  rf->init();
+  if (rf != NULL) {
+    rf->set_distance(dist);
+    rf->init();
+  }
   return rf;
 }
 
--- a/hotspot/src/share/vm/runtime/synchronizer.cpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/runtime/synchronizer.cpp	Thu Jul 30 15:23:25 2015 -0700
@@ -86,6 +86,8 @@
     }                                                                      \
   }
 
+#define HOTSPOT_MONITOR_PROBE_notify HOTSPOT_MONITOR_NOTIFY
+#define HOTSPOT_MONITOR_PROBE_notifyAll HOTSPOT_MONITOR_NOTIFYALL
 #define HOTSPOT_MONITOR_PROBE_waited HOTSPOT_MONITOR_WAITED
 
 #define DTRACE_MONITOR_PROBE(probe, monitor, obj, thread)                  \
@@ -146,6 +148,58 @@
 // operators: safepoints or indefinite blocking (blocking that might span a
 // safepoint) are forbidden. Generally the thread_state() is _in_Java upon
 // entry.
+//
+// Consider: An interesting optimization is to have the JIT recognize the
+// following common idiom:
+//   synchronized (someobj) { .... ; notify(); }
+// That is, we find a notify() or notifyAll() call that immediately precedes
+// the monitorexit operation.  In that case the JIT could fuse the operations
+// into a single notifyAndExit() runtime primitive.
+
+bool ObjectSynchronizer::quick_notify(oopDesc * obj, Thread * self, bool all) {
+  assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
+  assert(self->is_Java_thread(), "invariant");
+  assert(((JavaThread *) self)->thread_state() == _thread_in_Java, "invariant");
+  No_Safepoint_Verifier nsv;
+  if (obj == NULL) return false;  // slow-path for invalid obj
+  const markOop mark = obj->mark();
+
+  if (mark->has_locker() && self->is_lock_owned((address)mark->locker())) {
+    // Degenerate notify
+    // stack-locked by caller so by definition the implied waitset is empty.
+    return true;
+  }
+
+  if (mark->has_monitor()) {
+    ObjectMonitor * const mon = mark->monitor();
+    assert(mon->object() == obj, "invariant");
+    if (mon->owner() != self) return false;  // slow-path for IMS exception
+
+    if (mon->first_waiter() != NULL) {
+      // We have one or more waiters. Since this is an inflated monitor
+      // that we own, we can transfer one or more threads from the waitset
+      // to the entrylist here and now, avoiding the slow-path.
+      if (all) {
+        DTRACE_MONITOR_PROBE(notifyAll, mon, obj, self);
+      } else {
+        DTRACE_MONITOR_PROBE(notify, mon, obj, self);
+      }
+      int tally = 0;
+      do {
+        mon->INotify(self);
+        ++tally;
+      } while (mon->first_waiter() != NULL && all);
+      if (ObjectMonitor::_sync_Notifications != NULL) {
+        ObjectMonitor::_sync_Notifications->inc(tally);
+      }
+    }
+    return true;
+  }
+
+  // biased locking and any other IMS exception states take the slow-path
+  return false;
+}
+
 
 // The LockNode emitted directly at the synchronization site would have
 // been too big if it were to have included support for the cases of inflated
@@ -1451,8 +1505,7 @@
 // This is an unfortunate aspect of this design.
 
 enum ManifestConstants {
-  ClearResponsibleAtSTW   = 0,
-  MaximumRecheckInterval  = 1000
+  ClearResponsibleAtSTW = 0
 };
 
 // Deflate a single monitor if not in-use
--- a/hotspot/src/share/vm/runtime/synchronizer.hpp	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/src/share/vm/runtime/synchronizer.hpp	Thu Jul 30 15:23:25 2015 -0700
@@ -72,6 +72,7 @@
   static void notify(Handle obj, TRAPS);
   static void notifyall(Handle obj, TRAPS);
 
+  static bool quick_notify(oopDesc* obj, Thread* Self, bool All);
   static bool quick_enter(oop obj, Thread* Self, BasicLock* Lock);
 
   // Special internal-use-only method for use by JVM infrastructure
--- a/hotspot/test/compiler/dependencies/MonomorphicObjectCall/java/lang/Object.java	Thu Jul 30 11:15:36 2015 -0700
+++ b/hotspot/test/compiler/dependencies/MonomorphicObjectCall/java/lang/Object.java	Thu Jul 30 15:23:25 2015 -0700
@@ -58,8 +58,10 @@
         return getClass().getName() + "@" + Integer.toHexString(hashCode());
     }
 
+    @HotSpotIntrinsicCandidate
     public final native void notify();
 
+    @HotSpotIntrinsicCandidate
     public final native void notifyAll();
 
     public final native void wait(long timeout) throws InterruptedException;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/unsafe/UnsafeGetConstantField.java	Thu Jul 30 15:23:25 2015 -0700
@@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary tests on constant folding of unsafe get operations
+ * @library /testlibrary /../../test/lib
+ * @run main/bootclasspath -XX:+UnlockDiagnosticVMOptions
+ *                   -Xbatch -XX:-TieredCompilation
+ *                   -XX:+FoldStableValues
+ *                   -XX:+UseUnalignedAccesses
+ *                   java.lang.invoke.UnsafeGetConstantField
+ * @run main/bootclasspath -XX:+UnlockDiagnosticVMOptions
+ *                   -Xbatch -XX:-TieredCompilation
+ *                   -XX:+FoldStableValues
+ *                   -XX:-UseUnalignedAccesses
+ *                   java.lang.invoke.UnsafeGetConstantField
+ */
+package java.lang.invoke;
+
+import jdk.internal.org.objectweb.asm.*;
+import jdk.test.lib.Asserts;
+import jdk.test.lib.Utils;
+import sun.misc.Unsafe;
+import static jdk.internal.org.objectweb.asm.Opcodes.*;
+
+public class UnsafeGetConstantField {
+    static final Class<?> THIS_CLASS = UnsafeGetConstantField.class;
+
+    static final Unsafe U = Utils.getUnsafe();
+
+    public static void main(String[] args) {
+        testUnsafeGetAddress();
+        testUnsafeGetField();
+        testUnsafeGetFieldUnaligned();
+        System.out.println("TEST PASSED");
+    }
+
+    static final long nativeAddr = U.allocateMemory(16);
+    static void testUnsafeGetAddress() {
+        long cookie = 0x12345678L;
+        U.putAddress(nativeAddr, cookie);
+        for (int i = 0; i < 20_000; i++) {
+            Asserts.assertEquals(checkGetAddress(), cookie);
+        }
+    }
+    @DontInline
+    static long checkGetAddress() {
+        return U.getAddress(nativeAddr);
+    }
+
+    static void testUnsafeGetField() {
+        int[] testedFlags = new int[] { 0, ACC_STATIC, ACC_FINAL, (ACC_STATIC | ACC_FINAL) };
+        boolean[] boolValues = new boolean[] { false, true };
+        String[] modes = new String[] { "", "Volatile" };
+
+        for (JavaType t : JavaType.values()) {
+            for (int flags : testedFlags) {
+                for (boolean stable : boolValues) {
+                    for (boolean hasDefaultValue : boolValues) {
+                        for (String suffix : modes) {
+                            runTest(t, flags, stable, hasDefaultValue, suffix);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    static void testUnsafeGetFieldUnaligned() {
+        JavaType[] types = new JavaType[] { JavaType.S, JavaType.C, JavaType.I, JavaType.J };
+        int[] testedFlags = new int[] { 0, ACC_STATIC, ACC_FINAL, (ACC_STATIC | ACC_FINAL) };
+        boolean[] boolValues = new boolean[] { false, true };
+
+        for (JavaType t : types) {
+            for (int flags : testedFlags) {
+                for (boolean stable : boolValues) {
+                    for (boolean hasDefaultValue : boolValues) {
+                        runTest(t, flags, stable, hasDefaultValue, "Unaligned");
+                    }
+                }
+            }
+        }
+    }
+
+    static void runTest(JavaType t, int flags, boolean stable, boolean hasDefaultValue, String postfix) {
+        Generator g = new Generator(t, flags, stable, hasDefaultValue, postfix);
+        Test test = g.generate();
+        System.out.printf("type=%s flags=%d stable=%b default=%b post=%s\n",
+                          t.typeName, flags, stable, hasDefaultValue, postfix);
+        // Trigger compilation
+        for (int i = 0; i < 20_000; i++) {
+            Asserts.assertEQ(test.testDirect(), test.testUnsafe());
+        }
+    }
+
+    interface Test {
+        Object testDirect();
+        Object testUnsafe();
+    }
+
+    enum JavaType {
+        Z("Boolean", true),
+        B("Byte", new Byte((byte)-1)),
+        S("Short", new Short((short)-1)),
+        C("Char", Character.MAX_VALUE),
+        I("Int", -1),
+        J("Long", -1L),
+        F("Float", -1F),
+        D("Double", -1D),
+        L("Object", new Object());
+
+        String typeName;
+        Object value;
+        String wrapper;
+        JavaType(String name, Object value) {
+            this.typeName = name;
+            this.value = value;
+            this.wrapper = internalName(value.getClass());
+        }
+
+        String desc() {
+            if (this == JavaType.L) {
+                return "Ljava/lang/Object;";
+            } else {
+                return toString();
+            }
+        }
+    }
+
+    static String internalName(Class cls) {
+        return cls.getName().replace('.', '/');
+    }
+    static String descriptor(Class cls) {
+        return String.format("L%s;", internalName(cls));
+    }
+
+    /**
+     * Sample generated class:
+     * static class T1 implements Test {
+     *   final int f = -1;
+     *   static final long FIELD_OFFSET;
+     *   static final T1 t = new T1();
+     *   static {
+     *     FIELD_OFFSET = U.objectFieldOffset(T1.class.getDeclaredField("f"));
+     *   }
+     *   public Object testDirect()  { return t.f; }
+     *   public Object testUnsafe()  { return U.getInt(t, FIELD_OFFSET); }
+     * }
+     */
+    static class Generator {
+        static final String FIELD_NAME = "f";
+        static final String UNSAFE_NAME = internalName(Unsafe.class);
+        static final String UNSAFE_DESC = descriptor(Unsafe.class);
+
+        final JavaType type;
+        final int flags;
+        final boolean stable;
+        final boolean hasDefaultValue;
+        final String nameSuffix;
+
+        final String className;
+        final String classDesc;
+        final String fieldDesc;
+
+        Generator(JavaType t, int flags, boolean stable, boolean hasDefaultValue, String suffix) {
+            this.type = t;
+            this.flags = flags;
+            this.stable = stable;
+            this.hasDefaultValue = hasDefaultValue;
+            this.nameSuffix = suffix;
+
+            fieldDesc = type.desc();
+            className = String.format("%s$Test%s%s__f=%d__s=%b__d=%b", internalName(THIS_CLASS), type.typeName,
+                                      suffix, flags, stable, hasDefaultValue);
+            classDesc = String.format("L%s;", className);
+        }
+
+        byte[] generateClassFile() {
+            ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_MAXS | ClassWriter.COMPUTE_FRAMES);
+            cw.visit(Opcodes.V1_8, Opcodes.ACC_PUBLIC | Opcodes.ACC_SUPER, className, null, "java/lang/Object",
+                    new String[]{ internalName(Test.class) });
+
+            // Declare fields
+            cw.visitField(ACC_FINAL | ACC_STATIC, "t", classDesc, null, null).visitEnd();
+            cw.visitField(ACC_FINAL | ACC_STATIC, "FIELD_OFFSET", "J", null, null).visitEnd();
+            cw.visitField(ACC_FINAL | ACC_STATIC, "U", UNSAFE_DESC, null, null).visitEnd();
+            if (isStatic()) {
+                cw.visitField(ACC_FINAL | ACC_STATIC, "STATIC_BASE", "Ljava/lang/Object;", null, null).visitEnd();
+            }
+
+            FieldVisitor fv = cw.visitField(flags, FIELD_NAME, fieldDesc, null, null);
+            if (stable) {
+                fv.visitAnnotation(descriptor(Stable.class), true);
+            }
+            fv.visitEnd();
+
+            // Methods
+            {   // <init>
+                MethodVisitor mv = cw.visitMethod(0, "<init>", "()V", null, null);
+                mv.visitCode();
+
+                mv.visitVarInsn(ALOAD, 0);
+                mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V", false);
+                if (!isStatic()) {
+                    initField(mv);
+                }
+                mv.visitInsn(RETURN);
+
+                mv.visitMaxs(0, 0);
+                mv.visitEnd();
+            }
+
+            {   // public Object testDirect() { return t.f; }
+                MethodVisitor mv = cw.visitMethod(ACC_PUBLIC, "testDirect", "()Ljava/lang/Object;", null, null);
+                mv.visitCode();
+
+                getFieldValue(mv);
+                wrapResult(mv);
+                mv.visitInsn(ARETURN);
+
+                mv.visitMaxs(0, 0);
+                mv.visitEnd();
+            }
+
+            {   // public Object testUnsafe() { return U.getInt(t, FIELD_OFFSET); }
+                MethodVisitor mv = cw.visitMethod(ACC_PUBLIC, "testUnsafe", "()Ljava/lang/Object;", null, null);
+                mv.visitCode();
+
+                getFieldValueUnsafe(mv);
+                wrapResult(mv);
+                mv.visitInsn(ARETURN);
+
+                mv.visitMaxs(0, 0);
+                mv.visitEnd();
+            }
+
+            {   // <clinit>
+                MethodVisitor mv = cw.visitMethod(ACC_STATIC, "<clinit>", "()V", null, null);
+                mv.visitCode();
+
+                // Cache Unsafe instance
+                mv.visitMethodInsn(INVOKESTATIC, UNSAFE_NAME, "getUnsafe", "()"+UNSAFE_DESC, false);
+                mv.visitFieldInsn(PUTSTATIC, className, "U", UNSAFE_DESC);
+
+                // Create test object instance
+                mv.visitTypeInsn(NEW, className);
+                mv.visitInsn(DUP);
+                mv.visitMethodInsn(INVOKESPECIAL, className, "<init>", "()V", false);
+                mv.visitFieldInsn(PUTSTATIC, className, "t", classDesc);
+
+                // Compute field offset
+                getUnsafe(mv);
+                getField(mv);
+                mv.visitMethodInsn(INVOKEVIRTUAL, UNSAFE_NAME, (isStatic() ? "staticFieldOffset" : "objectFieldOffset"),
+                        "(Ljava/lang/reflect/Field;)J", false);
+                mv.visitFieldInsn(PUTSTATIC, className, "FIELD_OFFSET", "J");
+
+                // Compute base offset for static field
+                if (isStatic()) {
+                    getUnsafe(mv);
+                    getField(mv);
+                    mv.visitMethodInsn(INVOKEVIRTUAL, UNSAFE_NAME, "staticFieldBase", "(Ljava/lang/reflect/Field;)Ljava/lang/Object;", false);
+                    mv.visitFieldInsn(PUTSTATIC, className, "STATIC_BASE", "Ljava/lang/Object;");
+                    initField(mv);
+                }
+
+                mv.visitInsn(RETURN);
+                mv.visitMaxs(0, 0);
+                mv.visitEnd();
+            }
+
+            return cw.toByteArray();
+        }
+
+        Test generate() {
+            byte[] classFile = generateClassFile();
+            Class<?> c = U.defineClass(className, classFile, 0, classFile.length, THIS_CLASS.getClassLoader(), null);
+            try {
+                return (Test) c.newInstance();
+            } catch(Exception e) {
+                throw new Error(e);
+            }
+        }
+
+        boolean isStatic() {
+            return (flags & ACC_STATIC) > 0;
+        }
+        boolean isFinal() {
+            return (flags & ACC_FINAL) > 0;
+        }
+        void getUnsafe(MethodVisitor mv) {
+            mv.visitFieldInsn(GETSTATIC, className, "U", UNSAFE_DESC);
+        }
+        void getField(MethodVisitor mv) {
+            mv.visitLdcInsn(Type.getType(classDesc));
+            mv.visitLdcInsn(FIELD_NAME);
+            mv.visitMethodInsn(INVOKEVIRTUAL, "java/lang/Class", "getDeclaredField", "(Ljava/lang/String;)Ljava/lang/reflect/Field;", false);
+        }
+        void getFieldValue(MethodVisitor mv) {
+            if (isStatic()) {
+                mv.visitFieldInsn(GETSTATIC, className, FIELD_NAME, fieldDesc);
+            } else {
+                mv.visitFieldInsn(GETSTATIC, className, "t", classDesc);
+                mv.visitFieldInsn(GETFIELD, className, FIELD_NAME, fieldDesc);
+            }
+        }
+        void getFieldValueUnsafe(MethodVisitor mv) {
+            getUnsafe(mv);
+            if (isStatic()) {
+                mv.visitFieldInsn(GETSTATIC, className, "STATIC_BASE", "Ljava/lang/Object;");
+            } else {
+                mv.visitFieldInsn(GETSTATIC, className, "t", classDesc);
+            }
+            mv.visitFieldInsn(GETSTATIC, className, "FIELD_OFFSET", "J");
+            String name = "get" + type.typeName + nameSuffix;
+            mv.visitMethodInsn(INVOKEVIRTUAL, UNSAFE_NAME, name, "(Ljava/lang/Object;J)" + type.desc(), false);
+        }
+        void wrapResult(MethodVisitor mv) {
+            if (type != JavaType.L) {
+                String desc = String.format("(%s)L%s;", type.desc(), type.wrapper);
+                mv.visitMethodInsn(INVOKESTATIC, type.wrapper, "valueOf", desc, false);
+            }
+        }
+        void initField(MethodVisitor mv) {
+            if (hasDefaultValue) {
+                return; // Nothing to do
+            }
+            if (!isStatic()) {
+                mv.visitVarInsn(ALOAD, 0);
+            }
+            switch (type) {
+                case L: {
+                    mv.visitTypeInsn(NEW, "java/lang/Object");
+                    mv.visitInsn(DUP);
+                    mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V", false);
+
+                    break;
+                }
+                default: {
+                    mv.visitLdcInsn(type.value);
+                    break;
+                }
+            }
+            mv.visitFieldInsn((isStatic() ? PUTSTATIC : PUTFIELD), className, FIELD_NAME, fieldDesc);
+        }
+    }
+}