author | kvn |
Fri, 17 Jul 2015 14:51:28 -0700 | |
changeset 31865 | c3edfbc8f5ac |
parent 31795 | 8eb27ec3c443 (current diff) |
parent 31864 | 341ca6d4b290 (diff) |
child 31866 | 3c9022c78e10 |
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Fri Jul 17 14:51:28 2015 -0700 @@ -14101,7 +14101,7 @@ instruct vsub2F(vecD dst, vecD src1, vecD src2) %{ predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVF src1 src2)); + match(Set dst (SubVF src1 src2)); ins_cost(INSN_COST); format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %} ins_encode %{
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -261,7 +261,7 @@ // LoadStoreExclusiveOp __ stxr(r20, r21, r2); // stxr w20, x21, [x2] - __ stlxr(r7, r29, r7); // stlxr w7, x29, [x7] + __ stlxr(r5, r29, r7); // stlxr w5, x29, [x7] __ ldxr(r5, r16); // ldxr x5, [x16] __ ldaxr(r27, r29); // ldaxr x27, [x29] __ stlr(r0, r29); // stlr x0, [x29] @@ -295,7 +295,7 @@ __ ldxp(r8, r2, r19); // ldxp x8, x2, [x19] __ ldaxp(r7, r19, r14); // ldaxp x7, x19, [x14] __ stxp(r8, r27, r28, r5); // stxp w8, x27, x28, [x5] - __ stlxp(r6, r8, r14, r6); // stlxp w6, x8, x14, [x6] + __ stlxp(r5, r8, r14, r6); // stlxp w5, x8, x14, [x6] // LoadStoreExclusiveOp __ ldxpw(r25, r4, r22); // ldxp w25, w4, [x22] @@ -768,7 +768,7 @@ 24c: d61f0040 br x2 250: d63f00a0 blr x5 254: c8147c55 stxr w20, x21, [x2] - 258: c807fcfd stlxr w7, x29, [x7] + 258: c805fcfd stlxr w5, x29, [x7] 25c: c85f7e05 ldxr x5, [x16] 260: c85fffbb ldaxr x27, [x29] 264: c89fffa0 stlr x0, [x29] @@ -794,7 +794,7 @@ 2b4: c87f0a68 ldxp x8, x2, [x19] 2b8: c87fcdc7 ldaxp x7, x19, [x14] 2bc: c82870bb stxp w8, x27, x28, [x5] - 2c0: c826b8c8 stlxp w6, x8, x14, [x6] + 2c0: c825b8c8 stlxp w5, x8, x14, [x6] 2c4: 887f12d9 ldxp w25, w4, [x22] 2c8: 887fb9ee ldaxp w14, w14, [x15] 2cc: 8834215a stxp w20, w26, w8, [x10] @@ -1084,14 +1084,14 @@ 0xd4063721, 0xd4035082, 0xd400bfe3, 0xd4282fc0, 0xd444c320, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd5033fdf, 0xd5033f9f, 0xd5033abf, 0xd61f0040, - 0xd63f00a0, 0xc8147c55, 0xc807fcfd, 0xc85f7e05, + 0xd63f00a0, 0xc8147c55, 0xc805fcfd, 0xc85f7e05, 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88187cf8, 0x8815ff9a, 0x885f7cd5, 0x885fffcf, 0x889ffc73, 0x88dffc56, 0x48127c0f, 0x480bff85, 0x485f7cdd, 0x485ffcf2, 0x489fff99, 0x48dffe62, 0x080a7c3e, 0x0814fed5, 0x085f7c59, 0x085ffcb8, 0x089ffc70, 0x08dfffb6, 0xc87f0a68, 0xc87fcdc7, 0xc82870bb, - 0xc826b8c8, 0x887f12d9, 0x887fb9ee, 0x8834215a, + 0xc825b8c8, 0x887f12d9, 0x887fb9ee, 0x8834215a, 0x8837ca52, 0xf806317e, 0xb81b3337, 0x39000dc2, 0x78005149, 0xf84391f4, 0xb85b220c, 0x385fd356, 0x785d127e, 0x389f4149, 0x79801e3c, 0x79c014a3,
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -1106,11 +1106,13 @@ #define INSN4(NAME, sz, op, o0) /* Four registers */ \ void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \ + assert(Rs != Rn, "unpredictable instruction"); \ load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \ } #define INSN3(NAME, sz, op, o0) /* Three registers */ \ void NAME(Register Rs, Register Rt, Register Rn) { \ + assert(Rs != Rn, "unpredictable instruction"); \ load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \ }
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -3791,8 +3791,8 @@ br(Assembler::HI, slow_case); // If heap_top hasn't been changed by some other thread, update it. - stlxr(rscratch1, end, rscratch1); - cbnzw(rscratch1, retry); + stlxr(rscratch2, end, rscratch1); + cbnzw(rscratch2, retry); } }
--- a/hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,7 +36,7 @@ address generate_empty_entry(void) { return generate_jump_to_normal_entry(); } address generate_Reference_get_entry(void); - // Not supported - address generate_CRC32_update_entry() { return NULL; } - address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } + address generate_CRC32_update_entry(); + address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); + #endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -50,6 +50,7 @@ #else #define BLOCK_COMMENT(str) block_comment(str) #endif +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") #ifdef ASSERT // On RISC, there's no benefit to verifying instruction boundaries. @@ -3433,6 +3434,418 @@ bind(Ldone_false); } +// Helpers for Intrinsic Emitters +// +// Revert the byte order of a 32bit value in a register +// src: 0x44556677 +// dst: 0x77665544 +// Three steps to obtain the result: +// 1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word +// into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared. +// This value initializes dst. +// 2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost +// byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go. +// This value is mask inserted into dst with a [0..23] mask of 1s. +// 3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position. +// This value is mask inserted into dst with a [8..15] mask of 1s. +void MacroAssembler::load_reverse_32(Register dst, Register src) { + assert_different_registers(dst, src); + + rldicl(dst, src, (4+1)*8, 56); // Rotate byte 4 into position 7 (rightmost), clear all to the left. + rlwimi(dst, src, 3*8, 0, 23); // Insert byte 5 into position 6, 7 into 4, leave pos 7 alone. + rlwimi(dst, src, 1*8, 8, 15); // Insert byte 6 into position 5, leave the rest alone. +} + +// Calculate the column addresses of the crc32 lookup table into distinct registers. +// This loop-invariant calculation is moved out of the loop body, reducing the loop +// body size from 20 to 16 instructions. +// Returns the offset that was used to calculate the address of column tc3. +// Due to register shortage, setting tc3 may overwrite table. With the return offset +// at hand, the original table address can be easily reconstructed. +int MacroAssembler::crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3) { + +#ifdef VM_LITTLE_ENDIAN + // This is what we implement (the DOLIT4 part): + // ========================================================================= */ + // #define DOLIT4 c ^= *buf4++; \ + // c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + // crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] + // #define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + // ========================================================================= */ + const int ix0 = 3*(4*CRC32_COLUMN_SIZE); + const int ix1 = 2*(4*CRC32_COLUMN_SIZE); + const int ix2 = 1*(4*CRC32_COLUMN_SIZE); + const int ix3 = 0*(4*CRC32_COLUMN_SIZE); +#else + // This is what we implement (the DOBIG4 part): + // ========================================================================= + // #define DOBIG4 c ^= *++buf4; \ + // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] + // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + // ========================================================================= + const int ix0 = 4*(4*CRC32_COLUMN_SIZE); + const int ix1 = 5*(4*CRC32_COLUMN_SIZE); + const int ix2 = 6*(4*CRC32_COLUMN_SIZE); + const int ix3 = 7*(4*CRC32_COLUMN_SIZE); +#endif + assert_different_registers(table, tc0, tc1, tc2); + assert(table == tc3, "must be!"); + + if (ix0 != 0) addi(tc0, table, ix0); + if (ix1 != 0) addi(tc1, table, ix1); + if (ix2 != 0) addi(tc2, table, ix2); + if (ix3 != 0) addi(tc3, table, ix3); + + return ix3; +} + +/** + * uint32_t crc; + * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); + */ +void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) { + assert_different_registers(crc, table, tmp); + assert_different_registers(val, table); + + if (crc == val) { // Must rotate first to use the unmodified value. + rlwinm(tmp, val, 2, 24-2, 31-2); // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. + // As we use a word (4-byte) instruction, we have to adapt the mask bit positions. + srwi(crc, crc, 8); // Unsigned shift, clear leftmost 8 bits. + } else { + srwi(crc, crc, 8); // Unsigned shift, clear leftmost 8 bits. + rlwinm(tmp, val, 2, 24-2, 31-2); // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. + } + lwzx(tmp, table, tmp); + xorr(crc, crc, tmp); +} + +/** + * uint32_t crc; + * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); + */ +void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { + fold_byte_crc32(crc, crc, table, tmp); +} + +/** + * Emits code to update CRC-32 with a byte value according to constants in table. + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + BLOCK_COMMENT("update_byte_crc32:"); + xorr(val, val, crc); + fold_byte_crc32(crc, val, table, val); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register pointing to CRC table + */ +void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, + Register data, bool loopAlignment, bool invertCRC) { + assert_different_registers(crc, buf, len, table, data); + + Label L_mainLoop, L_done; + const int mainLoop_stepping = 1; + const int mainLoop_alignment = loopAlignment ? 32 : 4; // (InputForNewCode > 4 ? InputForNewCode : 32) : 4; + + // Process all bytes in a single-byte loop. + cmpdi(CCR0, len, 0); // Anything to do? + mtctr(len); + beq(CCR0, L_done); + + if (invertCRC) { + nand(crc, crc, crc); // ~c + } + + align(mainLoop_alignment); + BIND(L_mainLoop); + lbz(data, 0, buf); // Byte from buffer, zero-extended. + addi(buf, buf, mainLoop_stepping); // Advance buffer position. + update_byte_crc32(crc, data, table); + bdnz(L_mainLoop); // Iterate. + + if (invertCRC) { + nand(crc, crc, crc); // ~c + } + + bind(L_done); +} + +/** + * Emits code to update CRC-32 with a 4-byte value according to constants in table + * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c + */ +// A not on the lookup table address(es): +// The lookup table consists of two sets of four columns each. +// The columns {0..3} are used for little-endian machines. +// The columns {4..7} are used for big-endian machines. +// To save the effort of adding the column offset to the table address each time +// a table element is looked up, it is possible to pass the pre-calculated +// column addresses. +// Uses R9..R12 as work register. Must be saved/restored by caller, if necessary. +void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, + Register t0, Register t1, Register t2, Register t3, + Register tc0, Register tc1, Register tc2, Register tc3) { + assert_different_registers(crc, t3); + + // XOR crc with next four bytes of buffer. + lwz(t3, bufDisp, buf); + if (bufInc != 0) { + addi(buf, buf, bufInc); + } + xorr(t3, t3, crc); + + // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices. + rlwinm(t0, t3, 2, 24-2, 31-2); // ((t1 >> 0) & 0xff) << 2 + rlwinm(t1, t3, 32+(2- 8), 24-2, 31-2); // ((t1 >> 8) & 0xff) << 2 + rlwinm(t2, t3, 32+(2-16), 24-2, 31-2); // ((t1 >> 16) & 0xff) << 2 + rlwinm(t3, t3, 32+(2-24), 24-2, 31-2); // ((t1 >> 24) & 0xff) << 2 + + // Use the pre-calculated column addresses. + // Load pre-calculated table values. + lwzx(t0, tc0, t0); + lwzx(t1, tc1, t1); + lwzx(t2, tc2, t2); + lwzx(t3, tc3, t3); + + // Calculate new crc from table values. + xorr(t0, t0, t1); + xorr(t2, t2, t3); + xorr(crc, t0, t2); // Now crc contains the final checksum value. +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register pointing to CRC table + * + * Uses R9..R12 as work register. Must be saved/restored by caller! + */ +void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3, + Register tc0, Register tc1, Register tc2, Register tc3) { + assert_different_registers(crc, buf, len, table); + + Label L_mainLoop, L_tail; + Register tmp = t0; + Register data = t0; + Register tmp2 = t1; + const int mainLoop_stepping = 8; + const int tailLoop_stepping = 1; + const int log_stepping = exact_log2(mainLoop_stepping); + const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32; + const int complexThreshold = 2*mainLoop_stepping; + + // Don't test for len <= 0 here. This pathological case should not occur anyway. + // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. + // The situation itself is detected and handled correctly by the conditional branches + // following aghi(len, -stepping) and aghi(len, +stepping). + assert(tailLoop_stepping == 1, "check tailLoop_stepping!"); + + BLOCK_COMMENT("kernel_crc32_2word {"); + + nand(crc, crc, crc); // ~c + + // Check for short (<mainLoop_stepping) buffer. + cmpdi(CCR0, len, complexThreshold); + blt(CCR0, L_tail); + + // Pre-mainLoop alignment did show a slight (1%) positive effect on performance. + // We leave the code in for reference. Maybe we need alignment when we exploit vector instructions. + { + // Align buf addr to mainLoop_stepping boundary. + neg(tmp2, buf); // Calculate # preLoop iterations for alignment. + rldicl(tmp2, tmp2, 0, 64-log_stepping); // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63. + + if (complexThreshold > mainLoop_stepping) { + sub(len, len, tmp2); // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed). + } else { + sub(tmp, len, tmp2); // Remaining bytes for main loop. + cmpdi(CCR0, tmp, mainLoop_stepping); + blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing + mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed). + } + update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false); + } + + srdi(tmp2, len, log_stepping); // #iterations for mainLoop + andi(len, len, mainLoop_stepping-1); // remaining bytes for tailLoop + mtctr(tmp2); + +#ifdef VM_LITTLE_ENDIAN + Register crc_rv = crc; +#else + Register crc_rv = tmp; // Load_reverse needs separate registers to work on. + // Occupies tmp, but frees up crc. + load_reverse_32(crc_rv, crc); // Revert byte order because we are dealing with big-endian data. + tmp = crc; +#endif + + int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3); + + align(mainLoop_alignment); // Octoword-aligned loop address. Shows 2% improvement. + BIND(L_mainLoop); + update_1word_crc32(crc_rv, buf, table, 0, 0, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3); + update_1word_crc32(crc_rv, buf, table, 4, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3); + bdnz(L_mainLoop); + +#ifndef VM_LITTLE_ENDIAN + load_reverse_32(crc, crc_rv); // Revert byte order because we are dealing with big-endian data. + tmp = crc_rv; // Tmp uses it's original register again. +#endif + + // Restore original table address for tailLoop. + if (reconstructTableOffset != 0) { + addi(table, table, -reconstructTableOffset); + } + + // Process last few (<complexThreshold) bytes of buffer. + BIND(L_tail); + update_byteLoop_crc32(crc, buf, len, table, data, false, false); + + nand(crc, crc, crc); // ~c + BLOCK_COMMENT("} kernel_crc32_2word"); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register pointing to CRC table + * + * uses R9..R12 as work register. Must be saved/restored by caller! + */ +void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3, + Register tc0, Register tc1, Register tc2, Register tc3) { + assert_different_registers(crc, buf, len, table); + + Label L_mainLoop, L_tail; + Register tmp = t0; + Register data = t0; + Register tmp2 = t1; + const int mainLoop_stepping = 4; + const int tailLoop_stepping = 1; + const int log_stepping = exact_log2(mainLoop_stepping); + const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32; + const int complexThreshold = 2*mainLoop_stepping; + + // Don't test for len <= 0 here. This pathological case should not occur anyway. + // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. + // The situation itself is detected and handled correctly by the conditional branches + // following aghi(len, -stepping) and aghi(len, +stepping). + assert(tailLoop_stepping == 1, "check tailLoop_stepping!"); + + BLOCK_COMMENT("kernel_crc32_1word {"); + + nand(crc, crc, crc); // ~c + + // Check for short (<mainLoop_stepping) buffer. + cmpdi(CCR0, len, complexThreshold); + blt(CCR0, L_tail); + + // Pre-mainLoop alignment did show a slight (1%) positive effect on performance. + // We leave the code in for reference. Maybe we need alignment when we exploit vector instructions. + { + // Align buf addr to mainLoop_stepping boundary. + neg(tmp2, buf); // Calculate # preLoop iterations for alignment. + rldicl(tmp2, tmp2, 0, 64-log_stepping); // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63. + + if (complexThreshold > mainLoop_stepping) { + sub(len, len, tmp2); // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed). + } else { + sub(tmp, len, tmp2); // Remaining bytes for main loop. + cmpdi(CCR0, tmp, mainLoop_stepping); + blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing + mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed). + } + update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false); + } + + srdi(tmp2, len, log_stepping); // #iterations for mainLoop + andi(len, len, mainLoop_stepping-1); // remaining bytes for tailLoop + mtctr(tmp2); + +#ifdef VM_LITTLE_ENDIAN + Register crc_rv = crc; +#else + Register crc_rv = tmp; // Load_reverse needs separate registers to work on. + // Occupies tmp, but frees up crc. + load_reverse_32(crc_rv, crc); // evert byte order because we are dealing with big-endian data. + tmp = crc; +#endif + + int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3); + + align(mainLoop_alignment); // Octoword-aligned loop address. Shows 2% improvement. + BIND(L_mainLoop); + update_1word_crc32(crc_rv, buf, table, 0, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3); + bdnz(L_mainLoop); + +#ifndef VM_LITTLE_ENDIAN + load_reverse_32(crc, crc_rv); // Revert byte order because we are dealing with big-endian data. + tmp = crc_rv; // Tmp uses it's original register again. +#endif + + // Restore original table address for tailLoop. + if (reconstructTableOffset != 0) { + addi(table, table, -reconstructTableOffset); + } + + // Process last few (<complexThreshold) bytes of buffer. + BIND(L_tail); + update_byteLoop_crc32(crc, buf, len, table, data, false, false); + + nand(crc, crc, crc); // ~c + BLOCK_COMMENT("} kernel_crc32_1word"); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register pointing to CRC table + * + * Uses R7_ARG5, R8_ARG6 as work registers. + */ +void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3) { + assert_different_registers(crc, buf, len, table); + + Register data = t0; // Holds the current byte to be folded into crc. + + BLOCK_COMMENT("kernel_crc32_1byte {"); + + // Process all bytes in a single-byte loop. + update_byteLoop_crc32(crc, buf, len, table, data, true, true); + + BLOCK_COMMENT("} kernel_crc32_1byte"); +} + +void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) { + assert_different_registers(crc, buf, /* len, not used!! */ table, tmp); + + BLOCK_COMMENT("kernel_crc32_singleByte:"); + nand(crc, crc, crc); // ~c + + lbz(tmp, 0, buf); // Byte from buffer, zero-extended. + update_byte_crc32(crc, tmp, table); + + nand(crc, crc, crc); // ~c +} + // dest_lo += src1 + src2 // dest_hi += carry1 + carry2 void MacroAssembler::add2_with_carry(Register dest_hi, @@ -3515,7 +3928,7 @@ b(L_multiply); - bind( L_one_x ); // Load one 32 bit portion of x as (0,value). + bind(L_one_x); // Load one 32 bit portion of x as (0,value). lwz(x_xstart, 0, x); b(L_first_loop); @@ -3534,7 +3947,7 @@ // z[kdx] = (jlong)product; sldi(tmp, idx, LogBytesPerInt); - if ( offset ) { + if (offset) { addi(tmp, tmp, offset); } ldx(yz_idx, y, tmp); @@ -3551,7 +3964,7 @@ add2_with_carry(product_high, product, carry, yz_idx); sldi(tmp, idx, LogBytesPerInt); - if ( offset ) { + if (offset) { addi(tmp, tmp, offset); } #ifdef VM_LITTLE_ENDIAN
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -702,6 +702,27 @@ Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10, Register tmp11, Register tmp12, Register tmp13); + // CRC32 Intrinsics. + void load_reverse_32(Register dst, Register src); + int crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3); + void fold_byte_crc32(Register crc, Register val, Register table, Register tmp); + void fold_8bit_crc32(Register crc, Register table, Register tmp); + void update_byte_crc32(Register crc, Register val, Register table); + void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, + Register data, bool loopAlignment, bool invertCRC); + void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, + Register t0, Register t1, Register t2, Register t3, + Register tc0, Register tc1, Register tc2, Register tc3); + void kernel_crc32_2word(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3, + Register tc0, Register tc1, Register tc2, Register tc3); + void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3, + Register tc0, Register tc1, Register tc2, Register tc3); + void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3); + void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp); + // // Debugging //
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -2126,6 +2126,54 @@ return start; } + /** + * Arguments: + * + * Inputs: + * R3_ARG1 - int crc + * R4_ARG2 - byte* buf + * R5_ARG3 - int length (of buffer) + * + * scratch: + * R6_ARG4 - crc table address + * R7_ARG5 - tmp1 + * R8_ARG6 - tmp2 + * + * Ouput: + * R3_RET - int crc result + */ + // Compute CRC32 function. + address generate_CRC32_updateBytes(const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ function_entry(); // Remember stub start address (is rtn value). + + // arguments to kernel_crc32: + Register crc = R3_ARG1; // Current checksum, preset by caller or result from previous call. + Register data = R4_ARG2; // source byte array + Register dataLen = R5_ARG3; // #bytes to process + Register table = R6_ARG4; // crc table address + + Register t0 = R9; // work reg for kernel* emitters + Register t1 = R10; // work reg for kernel* emitters + Register t2 = R11; // work reg for kernel* emitters + Register t3 = R12; // work reg for kernel* emitters + + BLOCK_COMMENT("Stub body {"); + assert_different_registers(crc, data, dataLen, table); + + StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); + + __ kernel_crc32_1byte(crc, data, dataLen, table, t0, t1, t2, t3); + + BLOCK_COMMENT("return"); + __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET). + __ blr(); + + BLOCK_COMMENT("} Stub body"); + return start; + } + // Initialization void generate_initial() { // Generates all stubs and initializes the entry points @@ -2144,6 +2192,12 @@ StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); + + // CRC32 Intrinsics. + if (UseCRC32Intrinsics) { + StubRoutines::_crc_table_adr = (address)StubRoutines::ppc64::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes"); + } } void generate_all() {
--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,7 +23,457 @@ * */ +#include "asm/macroAssembler.inline.hpp" +#include "runtime/stubRoutines.hpp" + // Implementation of the platform-specific part of StubRoutines - for // a description of how to extend it, see the stubRoutines.hpp file. +#define __ masm-> + +// CRC32 Intrinsics. +void StubRoutines::ppc64::generate_load_crc_table_addr(MacroAssembler* masm, Register table) { + __ load_const(table, StubRoutines::_crc_table_adr); +} + +// CRC32 Intrinsics. +/** + * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.h + */ +juint StubRoutines::ppc64::_crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE] = { + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef CRC32_BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +};
--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,11 +30,35 @@ // definition. See stubRoutines.hpp for a description on how to // extend it. -static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; } +static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; } enum platform_dependent_constants { code_size1 = 20000, // simply increase if too small (assembler will crash if too small) code_size2 = 20000 // simply increase if too small (assembler will crash if too small) }; +// CRC32 Intrinsics. +#define CRC32_COLUMN_SIZE 256 +#define CRC32_BYFOUR +#ifdef CRC32_BYFOUR + #define CRC32_TABLES 8 +#else + #define CRC32_TABLES 1 +#endif + +class ppc64 { + friend class StubGenerator; + + private: + + // CRC32 Intrinsics. + static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE]; + + public: + + // CRC32 Intrinsics. + static void generate_load_crc_table_addr(MacroAssembler* masm, Register table); + +}; + #endif // CPU_PPC_VM_STUBROUTINES_PPC_64_HPP
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -58,7 +58,7 @@ #define BLOCK_COMMENT(str) __ block_comment(str) #endif -#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":") //----------------------------------------------------------------------------- @@ -725,7 +725,7 @@ } generate_counter_incr(&invocation_counter_overflow, NULL, NULL); - __ BIND(continue_after_compile); + BIND(continue_after_compile); // Reset the _do_not_unlock_if_synchronized flag. if (synchronized) { __ li(R0, 0); @@ -785,7 +785,7 @@ __ ld(signature_handler_fd, method_(signature_handler)); __ twi_0(signature_handler_fd); // Order wrt. load of klass mirror and entry point (isync is below). - __ BIND(call_signature_handler); + BIND(call_signature_handler); // Before we call the signature handler we push a new frame to // protect the interpreter frame volatile registers when we return @@ -855,7 +855,7 @@ __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); // R4_ARG2 = &state->_oop_temp; __ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp)); - __ BIND(method_is_not_static); + BIND(method_is_not_static); } // At this point, arguments have been copied off the stack into @@ -1068,14 +1068,14 @@ // interpreter will do the correct thing. If it isn't interpreted // (call stub/compiled code) we will change our return and continue. - __ BIND(exception_return_sync_check); + BIND(exception_return_sync_check); if (synchronized) { // Don't check for exceptions since we're still in the i2n frame. Do that // manually afterwards. unlock_method(false); } - __ BIND(exception_return_sync_check_already_unlocked); + BIND(exception_return_sync_check_already_unlocked); const Register return_pc = R31; @@ -1240,6 +1240,179 @@ return entry; } +// CRC32 Intrinsics. +// +// Contract on scratch and work registers. +// ======================================= +// +// On ppc, the register set {R2..R12} is available in the interpreter as scratch/work registers. +// You should, however, keep in mind that {R3_ARG1..R10_ARG8} is the C-ABI argument register set. +// You can't rely on these registers across calls. +// +// The generators for CRC32_update and for CRC32_updateBytes use the +// scratch/work register set internally, passing the work registers +// as arguments to the MacroAssembler emitters as required. +// +// R3_ARG1..R6_ARG4 are preset to hold the incoming java arguments. +// Their contents is not constant but may change according to the requirements +// of the emitted code. +// +// All other registers from the scratch/work register set are used "internally" +// and contain garbage (i.e. unpredictable values) once blr() is reached. +// Basically, only R3_RET contains a defined value which is the function result. +// +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + address start = __ pc(); // Remember stub start address (is rtn value). + + if (UseCRC32Intrinsics) { + Label slow_path; + + // Safepoint check + const Register sync_state = R11_scratch1; + int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true); + __ lwz(sync_state, sync_state_offs, sync_state); + __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); + __ bne(CCR0, slow_path); + + // We don't generate local frame and don't align stack because + // we not even call stub code (we generate the code inline) + // and there is no safepoint on this path. + + // Load java parameters. + // R15_esp is callers operand stack pointer, i.e. it points to the parameters. + const Register argP = R15_esp; + const Register crc = R3_ARG1; // crc value + const Register data = R4_ARG2; // address of java byte value (kernel_crc32 needs address) + const Register dataLen = R5_ARG3; // source data len (1 byte). Not used because calling the single-byte emitter. + const Register table = R6_ARG4; // address of crc32 table + const Register tmp = dataLen; // Reuse unused len register to show we don't actually need a separate tmp here. + + BLOCK_COMMENT("CRC32_update {"); + + // Arguments are reversed on java expression stack +#ifdef VM_LITTLE_ENDIAN + __ addi(data, argP, 0+1*wordSize); // (stack) address of byte value. Emitter expects address, not value. + // Being passed as an int, the single byte is at offset +0. +#else + __ addi(data, argP, 3+1*wordSize); // (stack) address of byte value. Emitter expects address, not value. + // Being passed from java as an int, the single byte is at offset +3. +#endif + __ lwz(crc, 2*wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register. + + StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); + __ kernel_crc32_singleByte(crc, data, dataLen, table, tmp); + + // Restore caller sp for c2i case and return. + __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. + __ blr(); + + // Generate a vanilla native entry as the slow path. + BLOCK_COMMENT("} CRC32_update"); + BIND(slow_path); + } + + (void) generate_native_entry(false); + + return start; +} + +// CRC32 Intrinsics. +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes( int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + address start = __ pc(); // Remember stub start address (is rtn value). + + if (UseCRC32Intrinsics) { + Label slow_path; + + // Safepoint check + const Register sync_state = R11_scratch1; + int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true); + __ lwz(sync_state, sync_state_offs, sync_state); + __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); + __ bne(CCR0, slow_path); + + // We don't generate local frame and don't align stack because + // we not even call stub code (we generate the code inline) + // and there is no safepoint on this path. + + // Load parameters. + // Z_esp is callers operand stack pointer, i.e. it points to the parameters. + const Register argP = R15_esp; + const Register crc = R3_ARG1; // crc value + const Register data = R4_ARG2; // address of java byte array + const Register dataLen = R5_ARG3; // source data len + const Register table = R6_ARG4; // address of crc32 table + + const Register t0 = R9; // scratch registers for crc calculation + const Register t1 = R10; + const Register t2 = R11; + const Register t3 = R12; + + const Register tc0 = R2; // registers to hold pre-calculated column addresses + const Register tc1 = R7; + const Register tc2 = R8; + const Register tc3 = table; // table address is reconstructed at the end of kernel_crc32_* emitters + + const Register tmp = t0; // Only used very locally to calculate byte buffer address. + + // Arguments are reversed on java expression stack. + // Calculate address of start element. + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { // Used for "updateByteBuffer direct". + BLOCK_COMMENT("CRC32_updateByteBuffer {"); + // crc @ (SP + 5W) (32bit) + // buf @ (SP + 3W) (64bit ptr to long array) + // off @ (SP + 2W) (32bit) + // dataLen @ (SP + 1W) (32bit) + // data = buf + off + __ ld( data, 3*wordSize, argP); // start of byte buffer + __ lwa( tmp, 2*wordSize, argP); // byte buffer offset + __ lwa( dataLen, 1*wordSize, argP); // #bytes to process + __ lwz( crc, 5*wordSize, argP); // current crc state + __ add( data, data, tmp); // Add byte buffer offset. + } else { // Used for "updateBytes update". + BLOCK_COMMENT("CRC32_updateBytes {"); + // crc @ (SP + 4W) (32bit) + // buf @ (SP + 3W) (64bit ptr to byte array) + // off @ (SP + 2W) (32bit) + // dataLen @ (SP + 1W) (32bit) + // data = buf + off + base_offset + __ ld( data, 3*wordSize, argP); // start of byte buffer + __ lwa( tmp, 2*wordSize, argP); // byte buffer offset + __ lwa( dataLen, 1*wordSize, argP); // #bytes to process + __ add( data, data, tmp); // add byte buffer offset + __ lwz( crc, 4*wordSize, argP); // current crc state + __ addi(data, data, arrayOopDesc::base_offset_in_bytes(T_BYTE)); + } + + StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); + + // Performance measurements show the 1word and 2word variants to be almost equivalent, + // with very light advantages for the 1word variant. We chose the 1word variant for + // code compactness. + __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3); + + // Restore caller sp for c2i case and return. + __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. + __ blr(); + + // Generate a vanilla native entry as the slow path. + BLOCK_COMMENT("} CRC32_updateBytes(Buffer)"); + BIND(slow_path); + } + + (void) generate_native_entry(false); + + return start; +} + // These should never be compiled since the interpreter will prefer // the compiled version to the intrinsic version. bool AbstractInterpreter::can_be_compiled(methodHandle m) {
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -159,10 +159,18 @@ assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); - if (UseCRC32Intrinsics) { - if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) - warning("CRC32 intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + // Implementation does not use any of the vector instructions + // available with Power8. Their exploitation is still pending. + if (!UseCRC32Intrinsics) { + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); + } + } + + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); } // The AES intrinsic stubs require AES instruction support. @@ -192,12 +200,6 @@ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } - if (UseCRC32CIntrinsics) { - if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) - warning("CRC32C intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); - } - if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true; }
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -285,7 +285,7 @@ CPU_AVX512BW = (1 << 31) } cpuFeatureFlags; -#define CPU_AVX512VL 0x100000000 // EVEX instructions with smaller vector length : enums are limited to 32bit +#define CPU_AVX512VL UCONST64(0x100000000) // EVEX instructions with smaller vector length : enums are limited to 32bit enum { // AMD
--- a/hotspot/src/share/vm/ci/ciField.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/ci/ciField.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -181,6 +181,17 @@ return (holder()->is_subclass_of(callsite_klass) && (name() == ciSymbol::target_name())); } + bool is_autobox_cache() { + ciSymbol* klass_name = holder()->name(); + return (name() == ciSymbol::cache_field_name() && + holder()->uses_default_loader() && + (klass_name == ciSymbol::java_lang_Character_CharacterCache() || + klass_name == ciSymbol::java_lang_Byte_ByteCache() || + klass_name == ciSymbol::java_lang_Short_ShortCache() || + klass_name == ciSymbol::java_lang_Integer_IntegerCache() || + klass_name == ciSymbol::java_lang_Long_LongCache())); + } + // Debugging output void print(); void print_name_on(outputStream* st);
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/classfile/classFileParser.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -4171,10 +4171,13 @@ } } +#ifdef ASSERT if (CheckIntrinsics) { // Check for orphan methods in the current class. A method m // of a class C is orphan if an intrinsic is defined for method m, // but class C does not declare m. + // The check is potentially expensive, therefore it is available + // only in debug builds. for (int id = vmIntrinsics::FIRST_ID; id < (int)vmIntrinsics::ID_LIMIT; id++) { if (id == vmIntrinsics::_compiledLambdaForm) { @@ -4210,8 +4213,10 @@ } } } +#endif // ASSERT } + if (cached_class_file != NULL) { // JVMTI: we have an InstanceKlass now, tell it about the cached bytes this_klass->set_cached_class_file(cached_class_file);
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -658,7 +658,10 @@ // annotation. If CheckIntrinsics is enabled, the VM performs the following // checks when a class C is loaded: (1) all intrinsics defined by the VM for // class C are present in the loaded class file and are marked; -// (2) an intrinsic is defined by the VM for all marked methods of class C. +// (2) an intrinsic is defined by the VM for all marked methods of class C; +// (3) check for orphan methods in class C (i.e., methods for which the VM +// declares an intrinsic but that are not declared for the loaded class C. +// Check (3) is available only in debug builds. // // If a mismatch is detected for a method, the VM behaves differently depending // on the type of build. A fastdebug build exits and reports an error on a mismatch.
--- a/hotspot/src/share/vm/opto/compile.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/compile.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -140,6 +140,9 @@ bool is_debug() const { return _debug; } void set_debug(bool debug) { _debug = debug; } static const char* debug_option_name; + + bool same_idx(node_idx_t k1, node_idx_t k2) const { return idx(k1) == idx(k2); } + bool same_gen(node_idx_t k1, node_idx_t k2) const { return gen(k1) == gen(k2); } }; //------------------------------Compile----------------------------------------
--- a/hotspot/src/share/vm/opto/library_call.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/library_call.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -2687,35 +2687,48 @@ // of safe & unsafe memory. if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder); - if (!is_store) { - MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered; - // To be valid, unsafe loads may depend on other conditions than - // the one that guards them: pin the Load node - Node* p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile); - // load value - switch (type) { - case T_BOOLEAN: - case T_CHAR: - case T_BYTE: - case T_SHORT: - case T_INT: - case T_LONG: - case T_FLOAT: - case T_DOUBLE: - break; - case T_OBJECT: - if (need_read_barrier) { - insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar)); + if (!is_store) { + Node* p = NULL; + // Try to constant fold a load from a constant field + ciField* field = alias_type->field(); + if (heap_base_oop != top() && + field != NULL && field->is_constant() && field->layout_type() == type) { + // final or stable field + const Type* con_type = Type::make_constant(alias_type->field(), heap_base_oop); + if (con_type != NULL) { + p = makecon(con_type); } - break; - case T_ADDRESS: - // Cast to an int type. - p = _gvn.transform(new CastP2XNode(NULL, p)); - p = ConvX2UL(p); - break; - default: - fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); - break; + } + if (p == NULL) { + MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered; + // To be valid, unsafe loads may depend on other conditions than + // the one that guards them: pin the Load node + p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile); + // load value + switch (type) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + case T_LONG: + case T_FLOAT: + case T_DOUBLE: + break; + case T_OBJECT: + if (need_read_barrier) { + insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar)); + } + break; + case T_ADDRESS: + // Cast to an int type. + p = _gvn.transform(new CastP2XNode(NULL, p)); + p = ConvX2UL(p); + break; + default: + fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); + break; + } } // The load node has the control of the preceding MemBarCPUOrder. All // following nodes will have the control of the MemBarCPUOrder inserted at
--- a/hotspot/src/share/vm/opto/loopnode.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/loopnode.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -3682,7 +3682,6 @@ } void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const { - CloneMap& cm = C->clone_map(); loop->dump_head(); // Now scan for CFG nodes in the same loop @@ -3714,7 +3713,6 @@ cached_idom = find_non_split_ctrl(cached_idom); } tty->print(" ID:%d",computed_idom->_idx); - cm.dump(n->_idx); n->dump(); if( cached_idom != computed_idom ) { tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d", @@ -3734,7 +3732,6 @@ for( uint j = 0; j < loop->_nest; j++ ) tty->print(" "); tty->print(" "); - cm.dump(m->_idx); m->dump(); } }
--- a/hotspot/src/share/vm/opto/parse.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/parse.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -539,10 +539,6 @@ void do_get_xxx(Node* obj, ciField* field, bool is_field); void do_put_xxx(Node* obj, ciField* field, bool is_field); - // loading from a constant field or the constant pool - // returns false if push failed (non-perm field constants only, not ldcs) - bool push_constant(ciConstant con, bool require_constant = false, bool is_autobox_cache = false, const Type* basic_type = NULL); - // implementation of object creation bytecodes void emit_guard_for_new(ciInstanceKlass* klass); void do_new();
--- a/hotspot/src/share/vm/opto/parse2.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/parse2.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -1478,8 +1478,10 @@ } assert(constant.basic_type() != T_OBJECT || constant.as_object()->is_instance(), "must be java_mirror of klass"); - bool pushed = push_constant(constant, true); - guarantee(pushed, "must be possible to push this constant"); + const Type* con_type = Type::make_from_constant(constant); + if (con_type != NULL) { + push_node(con_type->basic_type(), makecon(con_type)); + } } break;
--- a/hotspot/src/share/vm/opto/parse3.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/parse3.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -149,51 +149,10 @@ // Does this field have a constant value? If so, just push the value. if (field->is_constant()) { // final or stable field - const Type* stable_type = NULL; - if (FoldStableValues && field->is_stable()) { - stable_type = Type::get_const_type(field->type()); - if (field->type()->is_array_klass()) { - int stable_dimension = field->type()->as_array_klass()->dimension(); - stable_type = stable_type->is_aryptr()->cast_to_stable(true, stable_dimension); - } - } - if (field->is_static()) { - // final static field - if (C->eliminate_boxing()) { - // The pointers in the autobox arrays are always non-null. - ciSymbol* klass_name = field->holder()->name(); - if (field->name() == ciSymbol::cache_field_name() && - field->holder()->uses_default_loader() && - (klass_name == ciSymbol::java_lang_Character_CharacterCache() || - klass_name == ciSymbol::java_lang_Byte_ByteCache() || - klass_name == ciSymbol::java_lang_Short_ShortCache() || - klass_name == ciSymbol::java_lang_Integer_IntegerCache() || - klass_name == ciSymbol::java_lang_Long_LongCache())) { - bool require_const = true; - bool autobox_cache = true; - if (push_constant(field->constant_value(), require_const, autobox_cache)) { - return; - } - } - } - if (push_constant(field->constant_value(), false, false, stable_type)) - return; - } else { - // final or stable non-static field - // Treat final non-static fields of trusted classes (classes in - // java.lang.invoke and sun.invoke packages and subpackages) as - // compile time constants. - if (obj->is_Con()) { - const TypeOopPtr* oop_ptr = obj->bottom_type()->isa_oopptr(); - ciObject* constant_oop = oop_ptr->const_oop(); - ciConstant constant = field->constant_value_of(constant_oop); - if (FoldStableValues && field->is_stable() && constant.is_null_or_zero()) { - // fall through to field load; the field is not yet initialized - } else { - if (push_constant(constant, true, false, stable_type)) - return; - } - } + const Type* con_type = Type::make_constant(field, obj); + if (con_type != NULL) { + push_node(con_type->basic_type(), makecon(con_type)); + return; } } @@ -362,39 +321,6 @@ } } - - -bool Parse::push_constant(ciConstant constant, bool require_constant, bool is_autobox_cache, const Type* stable_type) { - const Type* con_type = Type::make_from_constant(constant, require_constant, is_autobox_cache); - switch (constant.basic_type()) { - case T_ARRAY: - case T_OBJECT: - // cases: - // can_be_constant = (oop not scavengable || ScavengeRootsInCode != 0) - // should_be_constant = (oop not scavengable || ScavengeRootsInCode >= 2) - // An oop is not scavengable if it is in the perm gen. - if (stable_type != NULL && con_type != NULL && con_type->isa_oopptr()) - con_type = con_type->join_speculative(stable_type); - break; - - case T_ILLEGAL: - // Invalid ciConstant returned due to OutOfMemoryError in the CI - assert(C->env()->failing(), "otherwise should not see this"); - // These always occur because of object types; we are going to - // bail out anyway, so make the stack depths match up - push( zerocon(T_OBJECT) ); - return false; - } - - if (con_type == NULL) - // we cannot inline the oop, but we can use it later to narrow a type - return false; - - push_node(constant.basic_type(), makecon(con_type)); - return true; -} - - //============================================================================= void Parse::do_anewarray() { bool will_link;
--- a/hotspot/src/share/vm/opto/superword.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/superword.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -74,9 +74,15 @@ _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style _ii_first(-1), // first loop generation index - only if do_vector_loop() _ii_last(-1), // last loop generation index - only if do_vector_loop() - _ii_order(arena(), 8, 0, 0), - _vector_loop_debug(phase->C->has_method() && phase->C->method_has_option("VectorizeDebug")) -{} + _ii_order(arena(), 8, 0, 0) +{ +#ifndef PRODUCT + _vector_loop_debug = 0; + if (_phase->C->method() != NULL) { + _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug); + } +#endif +} //------------------------------transform_loop--------------------------- void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { @@ -90,7 +96,6 @@ if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops - // Check for no control flow in body (other than exit) Node *cl_exit = cl->loopexit(); if (cl_exit->in(0) != lpt->_head) return; @@ -425,13 +430,15 @@ // this reference to a vector-aligned address. best_align_to_mem_ref = mem_ref; best_iv_adjustment = iv_adjustment; + NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);) } SWPointer align_to_ref_p(mem_ref, this, NULL, false); // Set alignment relative to "align_to_ref" for all related memory operations. for (int i = memops.size() - 1; i >= 0; i--) { MemNode* s = memops.at(i)->as_Mem(); - if (isomorphic(s, mem_ref)) { + if (isomorphic(s, mem_ref) && + (!_do_vector_loop || same_origin_idx(s, mem_ref))) { SWPointer p2(s, this, NULL, false); if (p2.comparable(align_to_ref_p)) { int align = memory_alignment(s, iv_adjustment); @@ -496,7 +503,7 @@ Node_List* pair = new Node_List(); pair->push(s1); pair->push(s2); - if (!_do_vector_loop || _clone_map.idx(s1->_idx) == _clone_map.idx(s2->_idx)) { + if (!_do_vector_loop || same_origin_idx(s1, s2)) { _packset.append(pair); } } @@ -533,8 +540,12 @@ memops.push(s); } MemNode* best_align_to_mem_ref = find_align_to_ref(memops); - if (best_align_to_mem_ref == NULL) break; + if (best_align_to_mem_ref == NULL) { + NOT_PRODUCT(if (TraceSuperWord) tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");) + break; + } best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); + NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);) // Restore list. while (memops.size() > orig_msize) (void)memops.pop(); @@ -560,6 +571,16 @@ #endif } +#ifndef PRODUCT +void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) { + if (is_trace_adjacent()) { + tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d", + best_align_to_mem_ref->_idx, best_iv_adjustment); + best_align_to_mem_ref->dump(); + } +} +#endif + //------------------------------find_align_to_ref--------------------------- // Find a memory reference to align the loop induction variable to. // Looks first at stores then at loads, looking for a memory reference @@ -756,9 +777,11 @@ } #ifndef PRODUCT - if (TraceSuperWord) - tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", - offset, iv_adjustment, elt_size, scale, iv_stride(), vw); + if (TraceSuperWord) { + tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ", + mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw); + mem_ref->dump(); + } #endif return iv_adjustment; } @@ -863,12 +886,14 @@ Node* n = start; Node* prev = NULL; while (true) { + NOT_PRODUCT( if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);) assert(in_bb(n), "must be in block"); for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node* out = n->fast_out(i); if (out->is_Load()) { if (in_bb(out)) { preds.push(out); + NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);) } } else { // FIXME @@ -883,10 +908,11 @@ } else { assert(out == prev || prev == NULL, "no branches off of store slice"); } - } - } + }//else + }//for if (n == stop) break; preds.push(n); + NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);) prev = n; assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name())); n = n->in(MemNode::Memory); @@ -2140,18 +2166,38 @@ } else { _stk.pop(); // Remove post-visited node from stack } - } - + }//while + + int ii_current = -1; + unsigned int load_idx = (unsigned int)-1; + _ii_order.clear(); // Create real map of block indices for nodes for (int j = 0; j < _block.length(); j++) { Node* n = _block.at(j); set_bb_idx(n, j); - } + if (_do_vector_loop && n->is_Load()) { + if (ii_current == -1) { + ii_current = _clone_map.gen(n->_idx); + _ii_order.push(ii_current); + load_idx = _clone_map.idx(n->_idx); + } else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) { + ii_current = _clone_map.gen(n->_idx); + _ii_order.push(ii_current); + } + } + }//for // Ensure extra info is allocated. initialize_bb(); #ifndef PRODUCT + if (_vector_loop_debug && _ii_order.length() > 0) { + tty->print("SuperWord::construct_bb: List of generations: "); + for (int jj = 0; jj < _ii_order.length(); ++jj) { + tty->print(" %d:%d", jj, _ii_order.at(jj)); + } + tty->print_cr(" "); + } if (TraceSuperWord) { print_bb(); tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); @@ -2312,18 +2358,27 @@ //------------------------------memory_alignment--------------------------- // Alignment within a vector memory reference int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { + #ifndef PRODUCT + if(TraceSuperWord && Verbose) { + tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump(); + } + #endif + NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);) SWPointer p(s, this, NULL, false); if (!p.valid()) { + NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");) return bottom_align; } int vw = vector_width_in_bytes(s); if (vw < 2) { + NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");) return bottom_align; // No vectors for this type } int offset = p.offset_in_bytes(); offset += iv_adjust*p.memory_size(); int off_rem = offset % vw; int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; + NOT_PRODUCT(if(TraceSuperWord && Verbose) tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);) return off_mod; } @@ -2732,13 +2787,20 @@ //==============================SWPointer=========================== - +#ifndef PRODUCT +int SWPointer::Tracer::_depth = 0; +#endif //----------------------------SWPointer------------------------ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) : _mem(mem), _slp(slp), _base(NULL), _adr(NULL), _scale(0), _offset(0), _invar(NULL), _negate_invar(false), _nstack(nstack), _analyze_only(analyze_only), - _stack_idx(0) { + _stack_idx(0) +#ifndef PRODUCT + , _tracer(slp) +#endif +{ + NOT_PRODUCT(_tracer.ctor_1(mem);) Node* adr = mem->in(MemNode::Address); if (!adr->is_AddP()) { @@ -2757,16 +2819,29 @@ assert(!valid(), "unsafe access"); return; } - for (int i = 0; i < 3; i++) { + + NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();) + NOT_PRODUCT(_tracer.ctor_2(adr);) + + int i; + for (i = 0; i < 3; i++) { + NOT_PRODUCT(_tracer.ctor_3(adr, i);) + if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { assert(!valid(), "too complex"); return; } adr = adr->in(AddPNode::Address); + NOT_PRODUCT(_tracer.ctor_4(adr, i);) + if (base == adr || !adr->is_AddP()) { + NOT_PRODUCT(_tracer.ctor_5(adr, base, i);) break; // stop looking at addp's } } + NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();) + NOT_PRODUCT(_tracer.ctor_6(mem);) + _base = base; _adr = adr; assert(valid(), "Usable"); @@ -2778,68 +2853,103 @@ _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), _scale(0), _offset(0), _invar(NULL), _negate_invar(false), _nstack(p->_nstack), _analyze_only(p->_analyze_only), - _stack_idx(p->_stack_idx) {} - + _stack_idx(p->_stack_idx) + #ifndef PRODUCT + , _tracer(p->_slp) + #endif +{} + + +bool SWPointer::invariant(Node* n) { + NOT_PRODUCT(Tracer::Depth dd;) + Node *n_c = phase()->get_ctrl(n); + NOT_PRODUCT(_tracer.invariant_1(n, n_c);) + return !lpt()->is_member(phase()->get_loop(n_c)); +} //------------------------scaled_iv_plus_offset-------------------- // Match: k*iv + offset // where: k is a constant that maybe zero, and // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional bool SWPointer::scaled_iv_plus_offset(Node* n) { + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);) + if (scaled_iv(n)) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);) return true; } + if (offset_plus_k(n)) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);) return true; } + int opc = n->Opcode(); if (opc == Op_AddI) { if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);) return true; } if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);) return true; } } else if (opc == Op_SubI) { if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);) return true; } if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { _scale *= -1; + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);) return true; } } + + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);) return false; } //----------------------------scaled_iv------------------------ // Match: k*iv where k is a constant that's not zero bool SWPointer::scaled_iv(Node* n) { - if (_scale != 0) { - return false; // already found a scale + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.scaled_iv_1(n);) + + if (_scale != 0) { // already found a scale + NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);) + return false; } + if (n == iv()) { _scale = 1; + NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);) return true; } if (_analyze_only && (invariant(n) == false)) { _nstack->push(n, _stack_idx++); } + int opc = n->Opcode(); if (opc == Op_MulI) { if (n->in(1) == iv() && n->in(2)->is_Con()) { _scale = n->in(2)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);) return true; } else if (n->in(2) == iv() && n->in(1)->is_Con()) { _scale = n->in(1)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);) return true; } } else if (opc == Op_LShiftI) { if (n->in(1) == iv() && n->in(2)->is_Con()) { _scale = 1 << n->in(2)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);) return true; } } else if (opc == Op_ConvI2L) { if (scaled_iv_plus_offset(n->in(1))) { + NOT_PRODUCT(_tracer.scaled_iv_7(n);) return true; } } else if (opc == Op_LShiftL) { @@ -2847,17 +2957,22 @@ // Need to preserve the current _offset value, so // create a temporary object for this expression subtree. // Hacky, so should re-engineer the address pattern match. + NOT_PRODUCT(Tracer::Depth dddd;) SWPointer tmp(this); + NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);) + if (tmp.scaled_iv_plus_offset(n->in(1))) { - if (tmp._invar == NULL) { + if (tmp._invar == NULL || _slp->do_vector_loop()) { int mult = 1 << n->in(2)->get_int(); _scale = tmp._scale * mult; _offset += tmp._offset * mult; + NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, mult);) return true; } } } } + NOT_PRODUCT(_tracer.scaled_iv_10(n);) return false; } @@ -2865,9 +2980,13 @@ // Match: offset is (k [+/- invariant]) // where k maybe zero and invariant is optional, but not both. bool SWPointer::offset_plus_k(Node* n, bool negate) { + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.offset_plus_k_1(n);) + int opc = n->Opcode(); if (opc == Op_ConI) { _offset += negate ? -(n->get_int()) : n->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);) return true; } else if (opc == Op_ConL) { // Okay if value fits into an int @@ -2876,11 +2995,17 @@ jlong loff = n->get_long(); jint off = (jint)loff; _offset += negate ? -off : loff; + NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);) return true; } + NOT_PRODUCT(_tracer.offset_plus_k_4(n);) return false; } - if (_invar != NULL) return false; // already have an invariant + if (_invar != NULL) { // already has an invariant + NOT_PRODUCT(_tracer.offset_plus_k_5(n, _invar);) + return false; + } + if (_analyze_only && (invariant(n) == false)) { _nstack->push(n, _stack_idx++); } @@ -2889,11 +3014,13 @@ _negate_invar = negate; _invar = n->in(1); _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);) return true; } else if (n->in(1)->is_Con() && invariant(n->in(2))) { _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); _negate_invar = negate; _invar = n->in(2); + NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);) return true; } } @@ -2902,19 +3029,24 @@ _negate_invar = negate; _invar = n->in(1); _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);) return true; } else if (n->in(1)->is_Con() && invariant(n->in(2))) { _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); _negate_invar = !negate; _invar = n->in(2); + NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);) return true; } } if (invariant(n)) { _negate_invar = negate; _invar = n; + NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);) return true; } + + NOT_PRODUCT(_tracer.offset_plus_k_11(n);) return false; } @@ -2930,6 +3062,287 @@ #endif } +//----------------------------tracing------------------------ +#ifndef PRODUCT +void SWPointer::Tracer::print_depth() { + for (int ii = 0; ii<_depth; ++ii) tty->print(" "); +} + +void SWPointer::Tracer::ctor_1 (Node* mem) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::SWPointer: start alignment analysis", mem->_idx); mem->dump(); + } +} + +void SWPointer::Tracer::ctor_2(Node* adr) { + if(_slp->is_trace_alignment()) { + //store_depth(); + inc_depth(); + print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump(); + inc_depth(); + print_depth(); tty->print(" %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump(); + } +} + +void SWPointer::Tracer::ctor_3(Node* adr, int i) { + if(_slp->is_trace_alignment()) { + inc_depth(); + Node* offset = adr->in(AddPNode::Offset); + print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump(); + } +} + +void SWPointer::Tracer::ctor_4(Node* adr, int i) { + if(_slp->is_trace_alignment()) { + inc_depth(); + print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i); adr->dump(); + } +} + +void SWPointer::Tracer::ctor_5(Node* adr, Node* base, int i) { + if(_slp->is_trace_alignment()) { + inc_depth(); + if (base == adr) { + print_depth(); tty->print_cr(" \\ %d (adr) == %d (base) SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i); + } else if (!adr->is_AddP()) { + print_depth(); tty->print_cr(" \\ %d (adr) is NOT Addp SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, i); + } + } +} + +void SWPointer::Tracer::ctor_6(Node* mem) { + if(_slp->is_trace_alignment()) { + //restore_depth(); + print_depth(); tty->print_cr(" %d (adr) SWPointer::SWPointer: stop analysis", mem->_idx); + } +} + +void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) { + if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) { + int is_member = _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)); + int in_bb = _slp->in_bb(n); + print_depth(); tty->print(" \\ "); tty->print_cr(" %d SWPointer::invariant conditions differ: n_c %d", n->_idx, n_c->_idx); + print_depth(); tty->print(" \\ "); tty->print_cr("is_member %d, in_bb %d", is_member, in_bb); + print_depth(); tty->print(" \\ "); n->dump(); + print_depth(); tty->print(" \\ "); n_c->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_1(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx); + n->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_2(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_3(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_4(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_5(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_6(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_7(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_8(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: FAILED", n->_idx); + } +} + +void SWPointer::Tracer::scaled_iv_1(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_2(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx); + print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: _scale (%d) != 0", scale); + } +} + +void SWPointer::Tracer::scaled_iv_3(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale); + } +} + +void SWPointer::Tracer::scaled_iv_4(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_5(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_6(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_7(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx); + print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx); + inc_depth(); inc_depth(); + print_depth(); n->in(1)->dump(); + dec_depth(); dec_depth(); + } +} + +void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp->print(); + } +} + +void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int _offset, int mult) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, _offset); + print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset, in(2) %d used to get mult = %d: _scale = %d, _offset = %d", + n->in(1)->_idx, n->in(2)->_idx, mult, scale, _offset); + inc_depth(); inc_depth(); + print_depth(); n->in(1)->dump(); + print_depth(); n->in(2)->dump(); + dec_depth(); dec_depth(); + } +} + +void SWPointer::Tracer::scaled_iv_10(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED", n->_idx); + } +} + +void SWPointer::Tracer::offset_plus_k_1(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_2(Node* n, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset); + } +} + +void SWPointer::Tracer::offset_plus_k_3(Node* n, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset); + } +} + +void SWPointer::Tracer::offset_plus_k_4(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); + print_depth(); tty->print_cr(" \\ " JLONG_FORMAT " SWPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long()); + } +} + +void SWPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: _invar != NULL: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print_cr(" \\ %d SWPointer::offset_plus_k: is invariant", n->_idx); + } +} + +void SWPointer::Tracer::offset_plus_k_11(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); + } +} + +#endif // ========================= OrderedPair ===================== const OrderedPair OrderedPair::initial; @@ -3076,13 +3489,20 @@ // // --------------------------------- vectorization/simd ----------------------------------- // +bool SuperWord::same_origin_idx(Node* a, Node* b) const { + return a != NULL && b != NULL && _clone_map.same_idx(a->_idx, b->_idx); +} +bool SuperWord::same_generation(Node* a, Node* b) const { + return a != NULL && b != NULL && _clone_map.same_gen(a->_idx, b->_idx); +} + Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) { assert(in_bb(ld), "must be in block"); if (_clone_map.gen(ld->_idx) == _ii_first) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(ld->_idx)=%d", - _clone_map.gen(ld->_idx)); + _clone_map.gen(ld->_idx)); } #endif return NULL; //we think that any ld in the first gen being vectorizable @@ -3094,18 +3514,18 @@ #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::find_phi_for_mem_dep input node %d to load %d has no other outputs and edge mem->load cannot be removed", - mem->_idx, ld->_idx); + mem->_idx, ld->_idx); ld->dump(); mem->dump(); } #endif return NULL; } - if (!in_bb(mem) || _clone_map.gen(mem->_idx) == _clone_map.gen(ld->_idx)) { + if (!in_bb(mem) || same_generation(mem, ld)) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d", - _clone_map.gen(mem->_idx)); + _clone_map.gen(mem->_idx)); } #endif return NULL; // does not depend on loop volatile node or depends on the same generation @@ -3136,7 +3556,7 @@ #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::find_phi_for_mem_dep load %d is not vectorizable node, its phi %d is not _mem_slice_head", - ld->_idx, phi->_idx); + ld->_idx, phi->_idx); ld->dump(); phi->dump(); } @@ -3151,11 +3571,11 @@ Node* SuperWord::first_node(Node* nd) { for (int ii = 0; ii < _iteration_first.length(); ii++) { Node* nnn = _iteration_first.at(ii); - if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) { + if (same_origin_idx(nnn, nd)) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)", - nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx)); + nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx)); } #endif return nnn; @@ -3165,7 +3585,7 @@ #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::first_node: did not find first iteration node for %d (_clone_map.idx(nd->_idx)=%d)", - nd->_idx, _clone_map.idx(nd->_idx)); + nd->_idx, _clone_map.idx(nd->_idx)); } #endif return 0; @@ -3174,11 +3594,11 @@ Node* SuperWord::last_node(Node* nd) { for (int ii = 0; ii < _iteration_last.length(); ii++) { Node* nnn = _iteration_last.at(ii); - if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) { + if (same_origin_idx(nnn, nd)) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d", - _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx)); + _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx)); } #endif return nnn; @@ -3219,9 +3639,11 @@ } else if (_ii_first != _clone_map.gen(ii->_idx)) { #ifndef PRODUCT if (TraceSuperWord && Verbose) { - tty->print_cr("SuperWord::mark_generations _ii_first error - found different generations in two nodes "); + tty->print_cr("SuperWord::mark_generations: _ii_first was found before and not equal to one in this node (%d)", _ii_first); ii->dump(); - ii_err->dump(); + if (ii_err!= 0) { + ii_err->dump(); + } } #endif return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized @@ -3252,8 +3674,7 @@ } // building order of iterations - assert(_ii_order.length() == 0, "should be empty"); - if (ii_err != 0) { + if (_ii_order.length() == 0 && ii_err != 0) { assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb"); Node* nd = ii_err; while(_clone_map.gen(nd->_idx) != _ii_last) { @@ -3261,7 +3682,7 @@ bool found = false; for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) { Node* use = nd->fast_out(i); - if (_clone_map.idx(use->_idx) == _clone_map.idx(nd->_idx) && use->as_Store()->in(MemNode::Memory) == nd) { + if (same_origin_idx(use, nd) && use->as_Store()->in(MemNode::Memory) == nd) { found = true; nd = use; break; @@ -3303,7 +3724,7 @@ bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) { assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes"); - assert(_clone_map.idx(gold->_idx) == _clone_map.idx(fix->_idx), "should be clones of the same node"); + assert(same_origin_idx(gold, fix), "should be clones of the same node"); Node* gin1 = gold->in(1); Node* gin2 = gold->in(2); Node* fin1 = fix->in(1); @@ -3311,12 +3732,12 @@ bool swapped = false; if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin1)) { - if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin1->_idx) && - _clone_map.idx(gin2->_idx) == _clone_map.idx(fin2->_idx)) { + if (same_origin_idx(gin1, fin1) && + same_origin_idx(gin2, fin2)) { return true; // nothing to fix } - if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin2->_idx) && - _clone_map.idx(gin2->_idx) == _clone_map.idx(fin1->_idx)) { + if (same_origin_idx(gin1, fin2) && + same_origin_idx(gin2, fin1)) { fix->swap_edges(1, 2); swapped = true; } @@ -3364,7 +3785,7 @@ for (int gen = 1; gen < _ii_order.length(); ++gen) { for (int kk = 0; kk < _block.length(); kk++) { Node* clone = _block.at(kk); - if (_clone_map.idx(clone->_idx) == _clone_map.idx(nd->_idx) && + if (same_origin_idx(clone, nd) && _clone_map.gen(clone->_idx) == _ii_order.at(gen)) { if (nd->is_Add() || nd->is_Mul()) { fix_commutative_inputs(nd, clone); @@ -3429,13 +3850,12 @@ if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) { for (int i = 0; i < _block.length(); i++) { Node* ld2 = _block.at(i); - if (ld2->is_Load() && - _clone_map.idx(ld->_idx) == _clone_map.idx(ld2->_idx) && - _clone_map.gen(ld->_idx) != _clone_map.gen(ld2->_idx)) { // <= do not collect the first generation ld + if (ld2->is_Load() && same_origin_idx(ld, ld2) && + !same_generation(ld, ld2)) { // <= do not collect the first generation ld #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)", - ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx); + ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx); } #endif // could not do on-the-fly, since iterator is immutable @@ -3453,7 +3873,7 @@ #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::hoist_loads_in_graph replacing MemNode::Memory(%d) edge in %d with one from %d", - MemNode::Memory, ld->_idx, phi->_idx); + MemNode::Memory, ld->_idx, phi->_idx); } #endif _igvn.replace_input_of(ld, MemNode::Memory, phi);
--- a/hotspot/src/share/vm/opto/superword.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/superword.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -203,6 +203,7 @@ // -----------------------------SuperWord--------------------------------- // Transforms scalar operations into packed (superword) operations. class SuperWord : public ResourceObj { + friend class SWPointer; private: PhaseIdealLoop* _phase; Arena* _arena; @@ -247,8 +248,17 @@ PhaseIdealLoop* phase() { return _phase; } IdealLoopTree* lpt() { return _lpt; } PhiNode* iv() { return _iv; } + bool early_return() { return _early_return; } +#ifndef PRODUCT + bool is_debug() { return _vector_loop_debug > 0; } + bool is_trace_alignment() { return (_vector_loop_debug & 2) > 0; } + bool is_trace_mem_slice() { return (_vector_loop_debug & 4) > 0; } + bool is_trace_loop() { return (_vector_loop_debug & 8) > 0; } + bool is_trace_adjacent() { return (_vector_loop_debug & 16) > 0; } +#endif + bool do_vector_loop() { return _do_vector_loop; } private: IdealLoopTree* _lpt; // Current loop tree node LoopNode* _lp; // Current LoopNode @@ -257,12 +267,14 @@ bool _race_possible; // In cases where SDMU is true bool _early_return; // True if we do not initialize bool _do_vector_loop; // whether to do vectorization/simd style - bool _vector_loop_debug; // provide more printing in debug mode int _num_work_vecs; // Number of non memory vector operations int _num_reductions; // Number of reduction expressions applied int _ii_first; // generation with direct deps from mem phi int _ii_last; // generation with direct deps to mem phi GrowableArray<int> _ii_order; +#ifndef PRODUCT + uintx _vector_loop_debug; // provide more printing in debug mode +#endif // Accessors Arena* arena() { return _arena; } @@ -325,12 +337,20 @@ Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; } void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } + // CloneMap utilities + bool same_origin_idx(Node* a, Node* b) const; + bool same_generation(Node* a, Node* b) const; + // methods // Extract the superword level parallelism void SLP_extract(); // Find the adjacent memory references and create pack pairs for them. void find_adjacent_refs(); + // Tracing support + #ifndef PRODUCT + void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment); + #endif // Find a memory reference to align the loop induction variable to. MemNode* find_align_to_ref(Node_List &memops); // Calculate loop's iv adjustment for this memory ops. @@ -340,13 +360,13 @@ // rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only) bool hoist_loads_in_graph(); // Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi - // Return false if failed. + // Return false if failed Node* find_phi_for_mem_dep(LoadNode* ld); // Return same node but from the first generation. Return 0, if not found Node* first_node(Node* nd); // Return same node as this but from the last generation. Return 0, if not found Node* last_node(Node* n); - // Mark nodes belonging to first and last generation, + // Mark nodes belonging to first and last generation // returns first generation index or -1 if vectorization/simd is impossible int mark_generations(); // swapping inputs of commutative instruction (Add or Mul) @@ -483,10 +503,7 @@ IdealLoopTree* lpt() { return _slp->lpt(); } PhiNode* iv() { return _slp->iv(); } // Induction var - bool invariant(Node* n) { - Node *n_c = phase()->get_ctrl(n); - return !lpt()->is_member(phase()->get_loop(n_c)); - } + bool invariant(Node* n); // Match: k*iv + offset bool scaled_iv_plus_offset(Node* n); @@ -545,6 +562,76 @@ static bool comparable(int cmp) { return cmp < NotComparable; } void print(); + +#ifndef PRODUCT + class Tracer { + friend class SuperWord; + friend class SWPointer; + SuperWord* _slp; + static int _depth; + int _depth_save; + void print_depth(); + int depth() const { return _depth; } + void set_depth(int d) { _depth = d; } + void inc_depth() { _depth++;} + void dec_depth() { if (_depth > 0) _depth--;} + void store_depth() {_depth_save = _depth;} + void restore_depth() {_depth = _depth_save;} + + class Depth { + friend class Tracer; + friend class SWPointer; + friend class SuperWord; + Depth() { ++_depth; } + Depth(int x) { _depth = 0; } + ~Depth() { if (_depth > 0) --_depth;} + }; + Tracer (SuperWord* slp) : _slp(slp) {} + + // tracing functions + void ctor_1(Node* mem); + void ctor_2(Node* adr); + void ctor_3(Node* adr, int i); + void ctor_4(Node* adr, int i); + void ctor_5(Node* adr, Node* base, int i); + void ctor_6(Node* mem); + + void invariant_1(Node *n, Node *n_c); + + void scaled_iv_plus_offset_1(Node* n); + void scaled_iv_plus_offset_2(Node* n); + void scaled_iv_plus_offset_3(Node* n); + void scaled_iv_plus_offset_4(Node* n); + void scaled_iv_plus_offset_5(Node* n); + void scaled_iv_plus_offset_6(Node* n); + void scaled_iv_plus_offset_7(Node* n); + void scaled_iv_plus_offset_8(Node* n); + + void scaled_iv_1(Node* n); + void scaled_iv_2(Node* n, int scale); + void scaled_iv_3(Node* n, int scale); + void scaled_iv_4(Node* n, int scale); + void scaled_iv_5(Node* n, int scale); + void scaled_iv_6(Node* n, int scale); + void scaled_iv_7(Node* n); + void scaled_iv_8(Node* n, SWPointer* tmp); + void scaled_iv_9(Node* n, int _scale, int _offset, int mult); + void scaled_iv_10(Node* n); + + void offset_plus_k_1(Node* n); + void offset_plus_k_2(Node* n, int _offset); + void offset_plus_k_3(Node* n, int _offset); + void offset_plus_k_4(Node* n); + void offset_plus_k_5(Node* n, Node* _invar); + void offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_11(Node* n); + + } _tracer;//TRacer; +#endif };
--- a/hotspot/src/share/vm/opto/type.cpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/type.cpp Fri Jul 17 14:51:28 2015 -0700 @@ -200,8 +200,7 @@ //-----------------------make_from_constant------------------------------------ -const Type* Type::make_from_constant(ciConstant constant, - bool require_constant, bool is_autobox_cache) { +const Type* Type::make_from_constant(ciConstant constant, bool require_constant) { switch (constant.basic_type()) { case T_BOOLEAN: return TypeInt::make(constant.as_boolean()); case T_CHAR: return TypeInt::make(constant.as_char()); @@ -222,14 +221,57 @@ if (oop_constant->is_null_object()) { return Type::get_zero_type(T_OBJECT); } else if (require_constant || oop_constant->should_be_constant()) { - return TypeOopPtr::make_from_constant(oop_constant, require_constant, is_autobox_cache); + return TypeOopPtr::make_from_constant(oop_constant, require_constant); + } + } + case T_ILLEGAL: + // Invalid ciConstant returned due to OutOfMemoryError in the CI + assert(Compile::current()->env()->failing(), "otherwise should not see this"); + return NULL; + } + // Fall through to failure + return NULL; +} + + +const Type* Type::make_constant(ciField* field, Node* obj) { + if (!field->is_constant()) return NULL; + + const Type* con_type = NULL; + if (field->is_static()) { + // final static field + con_type = Type::make_from_constant(field->constant_value(), /*require_const=*/true); + if (Compile::current()->eliminate_boxing() && field->is_autobox_cache() && con_type != NULL) { + con_type = con_type->is_aryptr()->cast_to_autobox_cache(true); + } + } else { + // final or stable non-static field + // Treat final non-static fields of trusted classes (classes in + // java.lang.invoke and sun.invoke packages and subpackages) as + // compile time constants. + if (obj->is_Con()) { + const TypeOopPtr* oop_ptr = obj->bottom_type()->isa_oopptr(); + ciObject* constant_oop = oop_ptr->const_oop(); + ciConstant constant = field->constant_value_of(constant_oop); + con_type = Type::make_from_constant(constant, /*require_const=*/true); + } + } + if (FoldStableValues && field->is_stable() && con_type != NULL) { + if (con_type->is_zero_type()) { + return NULL; // the field hasn't been initialized yet + } else if (con_type->isa_oopptr()) { + const Type* stable_type = Type::get_const_type(field->type()); + if (field->type()->is_array_klass()) { + int stable_dimension = field->type()->as_array_klass()->dimension(); + stable_type = stable_type->is_aryptr()->cast_to_stable(true, stable_dimension); + } + if (stable_type != NULL) { + con_type = con_type->join_speculative(stable_type); } } } - // Fall through to failure - return NULL; -} - + return con_type; +} //------------------------------make------------------------------------------- // Create a simple Type, with default empty symbol sets. Then hashcons it @@ -3009,9 +3051,7 @@ //------------------------------make_from_constant----------------------------- // Make a java pointer from an oop constant -const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o, - bool require_constant, - bool is_autobox_cache) { +const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o, bool require_constant) { assert(!o->is_null_object(), "null object not yet handled here."); ciKlass* klass = o->klass(); if (klass->is_instance_klass()) { @@ -3026,10 +3066,6 @@ // Element is an object array. Recursively call ourself. const TypeOopPtr *etype = TypeOopPtr::make_from_klass_raw(klass->as_obj_array_klass()->element_klass()); - if (is_autobox_cache) { - // The pointers in the autobox arrays are always non-null. - etype = etype->cast_to_ptr_type(TypePtr::NotNull)->is_oopptr(); - } const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length())); // We used to pass NotNull in here, asserting that the sub-arrays // are all not-null. This is not true in generally, as code can @@ -3039,7 +3075,7 @@ } else if (!o->should_be_constant()) { return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0); } - const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, NULL, InlineDepthBottom, is_autobox_cache); + const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0); return arr; } else if (klass->is_type_array_klass()) { // Element is an typeArray @@ -3940,7 +3976,6 @@ return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth); } - //------------------------------cast_to_stable--------------------------------- const TypeAryPtr* TypeAryPtr::cast_to_stable(bool stable, int stable_dimension) const { if (stable_dimension <= 0 || (stable_dimension == 1 && stable == this->is_stable())) @@ -3969,6 +4004,18 @@ return dim; } +//----------------------cast_to_autobox_cache----------------------------------- +const TypeAryPtr* TypeAryPtr::cast_to_autobox_cache(bool cache) const { + if (is_autobox_cache() == cache) return this; + const TypeOopPtr* etype = elem()->make_oopptr(); + if (etype == NULL) return this; + // The pointers in the autobox arrays are always non-null. + TypePtr::PTR ptr_type = cache ? TypePtr::NotNull : TypePtr::AnyNull; + etype = etype->cast_to_ptr_type(TypePtr::NotNull)->is_oopptr(); + const TypeAry* new_ary = TypeAry::make(etype, size(), is_stable()); + return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth, cache); +} + //------------------------------eq--------------------------------------------- // Structural equality check for Type representations bool TypeAryPtr::eq( const Type *t ) const { @@ -4455,7 +4502,7 @@ // TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple // constants bool TypeMetadataPtr::singleton(void) const { - // detune optimizer to not generate constant metadta + constant offset as a constant! + // detune optimizer to not generate constant metadata + constant offset as a constant! // TopPTR, Null, AnyNull, Constant are all singletons return (_offset == 0) && !below_centerline(_ptr); }
--- a/hotspot/src/share/vm/opto/type.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/opto/type.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -412,8 +412,9 @@ static const Type* get_typeflow_type(ciType* type); static const Type* make_from_constant(ciConstant constant, - bool require_constant = false, - bool is_autobox_cache = false); + bool require_constant = false); + + static const Type* make_constant(ciField* field, Node* obj); // Speculative type helper methods. See TypePtr. virtual const TypePtr* speculative() const { return NULL; } @@ -973,8 +974,7 @@ // may return a non-singleton type. // If require_constant, produce a NULL if a singleton is not possible. static const TypeOopPtr* make_from_constant(ciObject* o, - bool require_constant = false, - bool not_null_elements = false); + bool require_constant = false); // Make a generic (unclassed) pointer to an oop. static const TypeOopPtr* make(PTR ptr, int offset, int instance_id, @@ -1184,6 +1184,8 @@ const TypeAryPtr* cast_to_stable(bool stable, int stable_dimension = 1) const; int stable_dimension() const; + const TypeAryPtr* cast_to_autobox_cache(bool cache) const; + // Convenience common pre-built types. static const TypeAryPtr *RANGE; static const TypeAryPtr *OOPS; @@ -1674,12 +1676,12 @@ inline const TypePtr* Type::make_ptr() const { return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype() : - ((_base == NarrowKlass) ? is_narrowklass()->get_ptrtype() : - (isa_ptr() ? is_ptr() : NULL)); + ((_base == NarrowKlass) ? is_narrowklass()->get_ptrtype() : + isa_ptr()); } inline const TypeOopPtr* Type::make_oopptr() const { - return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype()->is_oopptr() : is_oopptr(); + return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype()->isa_oopptr() : isa_oopptr(); } inline const TypeNarrowOop* Type::make_narrowoop() const { @@ -1689,7 +1691,7 @@ inline const TypeNarrowKlass* Type::make_narrowklass() const { return (_base == NarrowKlass) ? is_narrowklass() : - (isa_ptr() ? TypeNarrowKlass::make(is_ptr()) : NULL); + (isa_ptr() ? TypeNarrowKlass::make(is_ptr()) : NULL); } inline bool Type::is_floatingpoint() const {
--- a/hotspot/src/share/vm/runtime/globals.hpp Fri Jul 17 08:46:52 2015 -0700 +++ b/hotspot/src/share/vm/runtime/globals.hpp Fri Jul 17 14:51:28 2015 -0700 @@ -4127,14 +4127,18 @@ "Use the FP register for holding the frame pointer " \ "and not as a general purpose register.") \ \ - diagnostic(bool, CheckIntrinsics, trueInDebug, \ + diagnostic(bool, CheckIntrinsics, true, \ "When a class C is loaded, check that " \ "(1) all intrinsics defined by the VM for class C are present "\ "in the loaded class file and are marked with the " \ - "@HotSpotIntrinsicCandidate annotation and also that " \ + "@HotSpotIntrinsicCandidate annotation, that " \ "(2) there is an intrinsic registered for all loaded methods " \ "that are annotated with the @HotSpotIntrinsicCandidate " \ - "annotation.") + "annotation, and that " \ + "(3) no orphan methods exist for class C (i.e., methods for " \ + "which the VM declares an intrinsic but that are not declared "\ + "in the loaded class C. " \ + "Check (3) is available only in debug builds.") /* * Macros for factoring of globals
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/unsafe/UnsafeGetConstantField.java Fri Jul 17 14:51:28 2015 -0700 @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @summary tests on constant folding of unsafe get operations + * @library /testlibrary /../../test/lib + * @run main/bootclasspath -XX:+UnlockDiagnosticVMOptions + * -Xbatch -XX:-TieredCompilation + * -XX:+FoldStableValues + * -XX:+UseUnalignedAccesses + * java.lang.invoke.UnsafeGetConstantField + * @run main/bootclasspath -XX:+UnlockDiagnosticVMOptions + * -Xbatch -XX:-TieredCompilation + * -XX:+FoldStableValues + * -XX:-UseUnalignedAccesses + * java.lang.invoke.UnsafeGetConstantField + */ +package java.lang.invoke; + +import jdk.internal.org.objectweb.asm.*; +import jdk.test.lib.Asserts; +import jdk.test.lib.Utils; +import sun.misc.Unsafe; +import static jdk.internal.org.objectweb.asm.Opcodes.*; + +public class UnsafeGetConstantField { + static final Class<?> THIS_CLASS = UnsafeGetConstantField.class; + + static final Unsafe U = Utils.getUnsafe(); + + public static void main(String[] args) { + testUnsafeGetAddress(); + testUnsafeGetField(); + testUnsafeGetFieldUnaligned(); + System.out.println("TEST PASSED"); + } + + static final long nativeAddr = U.allocateMemory(16); + static void testUnsafeGetAddress() { + long cookie = 0x12345678L; + U.putAddress(nativeAddr, cookie); + for (int i = 0; i < 20_000; i++) { + Asserts.assertEquals(checkGetAddress(), cookie); + } + } + @DontInline + static long checkGetAddress() { + return U.getAddress(nativeAddr); + } + + static void testUnsafeGetField() { + int[] testedFlags = new int[] { 0, ACC_STATIC, ACC_FINAL, (ACC_STATIC | ACC_FINAL) }; + boolean[] boolValues = new boolean[] { false, true }; + String[] modes = new String[] { "", "Volatile" }; + + for (JavaType t : JavaType.values()) { + for (int flags : testedFlags) { + for (boolean stable : boolValues) { + for (boolean hasDefaultValue : boolValues) { + for (String suffix : modes) { + runTest(t, flags, stable, hasDefaultValue, suffix); + } + } + } + } + } + } + + static void testUnsafeGetFieldUnaligned() { + JavaType[] types = new JavaType[] { JavaType.S, JavaType.C, JavaType.I, JavaType.J }; + int[] testedFlags = new int[] { 0, ACC_STATIC, ACC_FINAL, (ACC_STATIC | ACC_FINAL) }; + boolean[] boolValues = new boolean[] { false, true }; + + for (JavaType t : types) { + for (int flags : testedFlags) { + for (boolean stable : boolValues) { + for (boolean hasDefaultValue : boolValues) { + runTest(t, flags, stable, hasDefaultValue, "Unaligned"); + } + } + } + } + } + + static void runTest(JavaType t, int flags, boolean stable, boolean hasDefaultValue, String postfix) { + Generator g = new Generator(t, flags, stable, hasDefaultValue, postfix); + Test test = g.generate(); + System.out.printf("type=%s flags=%d stable=%b default=%b post=%s\n", + t.typeName, flags, stable, hasDefaultValue, postfix); + // Trigger compilation + for (int i = 0; i < 20_000; i++) { + Asserts.assertEQ(test.testDirect(), test.testUnsafe()); + } + } + + interface Test { + Object testDirect(); + Object testUnsafe(); + } + + enum JavaType { + Z("Boolean", true), + B("Byte", new Byte((byte)-1)), + S("Short", new Short((short)-1)), + C("Char", Character.MAX_VALUE), + I("Int", -1), + J("Long", -1L), + F("Float", -1F), + D("Double", -1D), + L("Object", new Object()); + + String typeName; + Object value; + String wrapper; + JavaType(String name, Object value) { + this.typeName = name; + this.value = value; + this.wrapper = internalName(value.getClass()); + } + + String desc() { + if (this == JavaType.L) { + return "Ljava/lang/Object;"; + } else { + return toString(); + } + } + } + + static String internalName(Class cls) { + return cls.getName().replace('.', '/'); + } + static String descriptor(Class cls) { + return String.format("L%s;", internalName(cls)); + } + + /** + * Sample generated class: + * static class T1 implements Test { + * final int f = -1; + * static final long FIELD_OFFSET; + * static final T1 t = new T1(); + * static { + * FIELD_OFFSET = U.objectFieldOffset(T1.class.getDeclaredField("f")); + * } + * public Object testDirect() { return t.f; } + * public Object testUnsafe() { return U.getInt(t, FIELD_OFFSET); } + * } + */ + static class Generator { + static final String FIELD_NAME = "f"; + static final String UNSAFE_NAME = internalName(Unsafe.class); + static final String UNSAFE_DESC = descriptor(Unsafe.class); + + final JavaType type; + final int flags; + final boolean stable; + final boolean hasDefaultValue; + final String nameSuffix; + + final String className; + final String classDesc; + final String fieldDesc; + + Generator(JavaType t, int flags, boolean stable, boolean hasDefaultValue, String suffix) { + this.type = t; + this.flags = flags; + this.stable = stable; + this.hasDefaultValue = hasDefaultValue; + this.nameSuffix = suffix; + + fieldDesc = type.desc(); + className = String.format("%s$Test%s%s__f=%d__s=%b__d=%b", internalName(THIS_CLASS), type.typeName, + suffix, flags, stable, hasDefaultValue); + classDesc = String.format("L%s;", className); + } + + byte[] generateClassFile() { + ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_MAXS | ClassWriter.COMPUTE_FRAMES); + cw.visit(Opcodes.V1_8, Opcodes.ACC_PUBLIC | Opcodes.ACC_SUPER, className, null, "java/lang/Object", + new String[]{ internalName(Test.class) }); + + // Declare fields + cw.visitField(ACC_FINAL | ACC_STATIC, "t", classDesc, null, null).visitEnd(); + cw.visitField(ACC_FINAL | ACC_STATIC, "FIELD_OFFSET", "J", null, null).visitEnd(); + cw.visitField(ACC_FINAL | ACC_STATIC, "U", UNSAFE_DESC, null, null).visitEnd(); + if (isStatic()) { + cw.visitField(ACC_FINAL | ACC_STATIC, "STATIC_BASE", "Ljava/lang/Object;", null, null).visitEnd(); + } + + FieldVisitor fv = cw.visitField(flags, FIELD_NAME, fieldDesc, null, null); + if (stable) { + fv.visitAnnotation(descriptor(Stable.class), true); + } + fv.visitEnd(); + + // Methods + { // <init> + MethodVisitor mv = cw.visitMethod(0, "<init>", "()V", null, null); + mv.visitCode(); + + mv.visitVarInsn(ALOAD, 0); + mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V", false); + if (!isStatic()) { + initField(mv); + } + mv.visitInsn(RETURN); + + mv.visitMaxs(0, 0); + mv.visitEnd(); + } + + { // public Object testDirect() { return t.f; } + MethodVisitor mv = cw.visitMethod(ACC_PUBLIC, "testDirect", "()Ljava/lang/Object;", null, null); + mv.visitCode(); + + getFieldValue(mv); + wrapResult(mv); + mv.visitInsn(ARETURN); + + mv.visitMaxs(0, 0); + mv.visitEnd(); + } + + { // public Object testUnsafe() { return U.getInt(t, FIELD_OFFSET); } + MethodVisitor mv = cw.visitMethod(ACC_PUBLIC, "testUnsafe", "()Ljava/lang/Object;", null, null); + mv.visitCode(); + + getFieldValueUnsafe(mv); + wrapResult(mv); + mv.visitInsn(ARETURN); + + mv.visitMaxs(0, 0); + mv.visitEnd(); + } + + { // <clinit> + MethodVisitor mv = cw.visitMethod(ACC_STATIC, "<clinit>", "()V", null, null); + mv.visitCode(); + + // Cache Unsafe instance + mv.visitMethodInsn(INVOKESTATIC, UNSAFE_NAME, "getUnsafe", "()"+UNSAFE_DESC, false); + mv.visitFieldInsn(PUTSTATIC, className, "U", UNSAFE_DESC); + + // Create test object instance + mv.visitTypeInsn(NEW, className); + mv.visitInsn(DUP); + mv.visitMethodInsn(INVOKESPECIAL, className, "<init>", "()V", false); + mv.visitFieldInsn(PUTSTATIC, className, "t", classDesc); + + // Compute field offset + getUnsafe(mv); + getField(mv); + mv.visitMethodInsn(INVOKEVIRTUAL, UNSAFE_NAME, (isStatic() ? "staticFieldOffset" : "objectFieldOffset"), + "(Ljava/lang/reflect/Field;)J", false); + mv.visitFieldInsn(PUTSTATIC, className, "FIELD_OFFSET", "J"); + + // Compute base offset for static field + if (isStatic()) { + getUnsafe(mv); + getField(mv); + mv.visitMethodInsn(INVOKEVIRTUAL, UNSAFE_NAME, "staticFieldBase", "(Ljava/lang/reflect/Field;)Ljava/lang/Object;", false); + mv.visitFieldInsn(PUTSTATIC, className, "STATIC_BASE", "Ljava/lang/Object;"); + initField(mv); + } + + mv.visitInsn(RETURN); + mv.visitMaxs(0, 0); + mv.visitEnd(); + } + + return cw.toByteArray(); + } + + Test generate() { + byte[] classFile = generateClassFile(); + Class<?> c = U.defineClass(className, classFile, 0, classFile.length, THIS_CLASS.getClassLoader(), null); + try { + return (Test) c.newInstance(); + } catch(Exception e) { + throw new Error(e); + } + } + + boolean isStatic() { + return (flags & ACC_STATIC) > 0; + } + boolean isFinal() { + return (flags & ACC_FINAL) > 0; + } + void getUnsafe(MethodVisitor mv) { + mv.visitFieldInsn(GETSTATIC, className, "U", UNSAFE_DESC); + } + void getField(MethodVisitor mv) { + mv.visitLdcInsn(Type.getType(classDesc)); + mv.visitLdcInsn(FIELD_NAME); + mv.visitMethodInsn(INVOKEVIRTUAL, "java/lang/Class", "getDeclaredField", "(Ljava/lang/String;)Ljava/lang/reflect/Field;", false); + } + void getFieldValue(MethodVisitor mv) { + if (isStatic()) { + mv.visitFieldInsn(GETSTATIC, className, FIELD_NAME, fieldDesc); + } else { + mv.visitFieldInsn(GETSTATIC, className, "t", classDesc); + mv.visitFieldInsn(GETFIELD, className, FIELD_NAME, fieldDesc); + } + } + void getFieldValueUnsafe(MethodVisitor mv) { + getUnsafe(mv); + if (isStatic()) { + mv.visitFieldInsn(GETSTATIC, className, "STATIC_BASE", "Ljava/lang/Object;"); + } else { + mv.visitFieldInsn(GETSTATIC, className, "t", classDesc); + } + mv.visitFieldInsn(GETSTATIC, className, "FIELD_OFFSET", "J"); + String name = "get" + type.typeName + nameSuffix; + mv.visitMethodInsn(INVOKEVIRTUAL, UNSAFE_NAME, name, "(Ljava/lang/Object;J)" + type.desc(), false); + } + void wrapResult(MethodVisitor mv) { + if (type != JavaType.L) { + String desc = String.format("(%s)L%s;", type.desc(), type.wrapper); + mv.visitMethodInsn(INVOKESTATIC, type.wrapper, "valueOf", desc, false); + } + } + void initField(MethodVisitor mv) { + if (hasDefaultValue) { + return; // Nothing to do + } + if (!isStatic()) { + mv.visitVarInsn(ALOAD, 0); + } + switch (type) { + case L: { + mv.visitTypeInsn(NEW, "java/lang/Object"); + mv.visitInsn(DUP); + mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V", false); + + break; + } + default: { + mv.visitLdcInsn(type.value); + break; + } + } + mv.visitFieldInsn((isStatic() ? PUTSTATIC : PUTFIELD), className, FIELD_NAME, fieldDesc); + } + } +}