--- a/hotspot/make/aix/makefiles/adlc.make Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/make/aix/makefiles/adlc.make Fri Apr 03 11:41:01 2015 -0700
@@ -1,5 +1,5 @@
#
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -140,13 +140,7 @@
# Note "+="; it is a hook so flags.make can add more flags, like -g or -DFOO.
ADLCFLAGS += -q -T
-# Normally, debugging is done directly on the ad_<arch>*.cpp files.
-# But -g will put #line directives in those files pointing back to <arch>.ad.
-# Some builds of gcc 3.2 have a bug that gets tickled by the extra #line directives
-# so skip it for 3.2 and ealier.
-ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0"
ADLCFLAGS += -g
-endif
ifdef LP64
ADLCFLAGS += -D_LP64
--- a/hotspot/make/aix/makefiles/ppc64.make Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/make/aix/makefiles/ppc64.make Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
#
-# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2012, 2013 SAP AG. All rights reserved.
+# Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -71,9 +71,6 @@
OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
-# xlc 10.01 parameters for ipa compile.
-QIPA_COMPILE=$(if $(CXX_IS_V10),-qipa)
-
# Xlc 10.1 parameters for aggressive optimization:
# - qhot=level=1: Most aggressive loop optimizations.
# - qignerrno: Assume errno is not modified by system calls.
@@ -88,7 +85,7 @@
OPT_CFLAGS/synchronizer.o = $(OPT_CFLAGS) -qnoinline
# Set all the xlC V10.1 options here.
-OPT_CFLAGS += $(QIPA_COMPILE) $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
+OPT_CFLAGS += $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
export OBJECT_MODE=64
--- a/hotspot/make/aix/makefiles/xlc.make Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/make/aix/makefiles/xlc.make Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
#
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, 2013 SAP. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, 2015 SAP. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -34,13 +34,17 @@
AS = $(CC) -c
-# get xlc version
-CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | sed -n 's/.*Version: \([0-9.]*\)/\1/p')
+# get xlc version which comes as VV.RR.MMMM.LLLL where 'VV' is the version,
+# 'RR' is the release, 'MMMM' is the modification and 'LLLL' is the level.
+# We only use 'VV.RR.LLLL' to avoid integer overflows in bash when comparing
+# the version numbers (some shells only support 32-bit integer compares!).
+CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | \
+ sed -n 's/.*Version: \([0-9]\{2\}\).\([0-9]\{2\}\).[0-9]\{4\}.\([0-9]\{4\}\)/\1\2\3/p')
# xlc 08.00.0000.0023 and higher supports -qtune=balanced
-CXX_SUPPORTS_BALANCED_TUNING=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 080000000023 ] ; then echo "true" ; fi)
+CXX_SUPPORTS_BALANCED_TUNING := $(shell if [ $(CXX_VERSION) -ge 08000023 ] ; then echo "true" ; fi)
# xlc 10.01 is used with aggressive optimizations to boost performance
-CXX_IS_V10=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 100100000000 ] ; then echo "true" ; fi)
+CXX_IS_V10 := $(shell if [ $(CXX_VERSION) -ge 10010000 ] ; then echo "true" ; fi)
# check for precompiled headers support
--- a/hotspot/make/build.sh Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/make/build.sh Fri Apr 03 11:41:01 2015 -0700
@@ -40,7 +40,7 @@
exit 1
fi
-if [ "${JAVA_HOME-}" = "" -o ! -d "${JAVA_HOME-}" -o ! -d ${JAVA_HOME-}/jre/lib/ ]; then
+if [ "${JAVA_HOME-}" = "" -o ! -d "${JAVA_HOME-}" ]; then
echo "JAVA_HOME needs to be set to a valid JDK path"
echo "JAVA_HOME: ${JAVA_HOME-}"
exit 1
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Fri Apr 03 11:41:01 2015 -0700
@@ -1818,6 +1818,8 @@
case INDINDEXSCALEDI2L:
case INDINDEXSCALEDOFFSETI2LN:
case INDINDEXSCALEDI2LN:
+ case INDINDEXOFFSETI2L:
+ case INDINDEXOFFSETI2LN:
scale = Address::sxtw(size);
break;
default:
@@ -4264,6 +4266,20 @@
%}
%}
+operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP (AddP reg (ConvI2L ireg)) off);
+ op_cost(INSN_COST);
+ format %{ "$reg, $ireg, $off I2L" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index($ireg);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
@@ -4324,7 +4340,7 @@
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP reg off);
- op_cost(INSN_COST);
+ op_cost(0);
format %{ "[$reg, $off]" %}
interface(MEMORY_INTER) %{
base($reg);
@@ -4394,6 +4410,21 @@
%}
%}
+operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
+%{
+ predicate(Universe::narrow_oop_shift() == 0);
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
+ op_cost(INSN_COST);
+ format %{ "$reg, $ireg, $off I2L\t# narrow" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index($ireg);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
@@ -4656,8 +4687,8 @@
// memory is used to define read/write location for load/store
// instruction defs. we can turn a memory op into an Address
-opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
- indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
+opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
+ indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
@@ -7523,7 +7554,7 @@
instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
match(Set dst (AddP src1 (ConvI2L src2)));
- ins_cost(INSN_COST);
+ ins_cost(1.9 * INSN_COST);
format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
ins_encode %{
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -193,6 +193,11 @@
}
}
+ // This machine allows unaligned memory accesses
+ if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+ FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+ }
+
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(OptoScheduling)) {
OptoScheduling = true;
--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -466,7 +466,7 @@
strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH
const char* mh_reg_name = has_mh ? "R23_method_handle" : "G23";
tty->print_cr("MH %s %s="INTPTR_FORMAT " sp=" INTPTR_FORMAT,
- adaptername, mh_reg_name, (intptr_t) mh, entry_sp);
+ adaptername, mh_reg_name, p2i(mh), p2i(entry_sp));
if (Verbose) {
tty->print_cr("Registers:");
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -172,6 +172,12 @@
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ // This machine does not allow unaligned memory accesses
+ if (UseUnalignedAccesses) {
+ if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+ warning("Unaligned memory access is not available on this CPU");
+ FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+ }
}
void VM_Version::print_features() {
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -356,6 +356,13 @@
(cache_line_size > ContendedPaddingWidth))
ContendedPaddingWidth = cache_line_size;
+ // This machine does not allow unaligned memory accesses
+ if (UseUnalignedAccesses) {
+ if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+ warning("Unaligned memory access is not available on this CPU");
+ FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+ }
+
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose) {
tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -3359,6 +3359,20 @@
// Integer vector arithmetic
+void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+ emit_int8(0x01);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+ emit_int8(0x02);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::paddb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
@@ -3379,6 +3393,20 @@
emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
}
+void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse3(), ""));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_int8(0x01);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse3(), ""));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_int8(0x02);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
@@ -3804,6 +3832,17 @@
emit_int8(0x01);
}
+void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ bool vector256 = true;
+ int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+ emit_int8(0x19);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x00 - insert into lower 128 bits
+ // 0x01 - insert into upper 128 bits
+ emit_int8(0x01);
+}
+
void Assembler::vextractf128h(Address dst, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -1777,6 +1777,12 @@
void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ // Add horizontal packed integers
+ void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void phaddw(XMMRegister dst, XMMRegister src);
+ void phaddd(XMMRegister dst, XMMRegister src);
+
// Add packed integers
void paddb(XMMRegister dst, XMMRegister src);
void paddw(XMMRegister dst, XMMRegister src);
@@ -1869,6 +1875,7 @@
// Copy low 128bit into high 128bit of YMM registers.
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vextractf128h(XMMRegister dst, XMMRegister src);
// Load/store high 128bit of YMM registers which does not destroy other half.
void vinsertf128h(XMMRegister dst, Address src);
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -980,6 +980,11 @@
(cache_line_size > ContendedPaddingWidth))
ContendedPaddingWidth = cache_line_size;
+ // This machine allows unaligned memory accesses
+ if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+ FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+ }
+
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per core: %u",
--- a/hotspot/src/cpu/x86/vm/x86.ad Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/x86.ad Fri Apr 03 11:41:01 2015 -0700
@@ -490,7 +490,7 @@
class NativeJump;
class CallStubImpl {
-
+
//--------------------------------------------------------------
//---< Used for optimization in Compile::shorten_branches >---
//--------------------------------------------------------------
@@ -500,9 +500,9 @@
static uint size_call_trampoline() {
return 0; // no call trampolines on this platform
}
-
+
// number of relocations needed by a call trampoline stub
- static uint reloc_call_trampoline() {
+ static uint reloc_call_trampoline() {
return 0; // no call trampolines on this platform
}
};
@@ -623,6 +623,22 @@
if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
return false;
break;
+ case Op_AddReductionVL:
+ if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
+ return false;
+ case Op_AddReductionVI:
+ if (UseSSE < 3) // requires at least SSE3
+ return false;
+ case Op_MulReductionVI:
+ if (UseSSE < 4) // requires at least SSE4
+ return false;
+ case Op_AddReductionVF:
+ case Op_AddReductionVD:
+ case Op_MulReductionVF:
+ case Op_MulReductionVD:
+ if (UseSSE < 1) // requires at least SSE
+ return false;
+ break;
case Op_CompareAndSwapL:
#ifdef _LP64
case Op_CompareAndSwapP:
@@ -2532,6 +2548,574 @@
ins_pipe( fpu_reg_reg );
%}
+// ====================REDUCTION ARITHMETIC=======================================
+
+instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseSSE > 2 && UseAVX == 0);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp2, TEMP tmp);
+ format %{ "movdqu $tmp2,$src2\n\t"
+ "phaddd $tmp2,$tmp2\n\t"
+ "movd $tmp,$src1\n\t"
+ "paddd $tmp,$tmp2\n\t"
+ "movd $dst,$tmp\t! add reduction2I" %}
+ ins_encode %{
+ __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdl($tmp$$XMMRegister, $src1$$Register);
+ __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdl($dst$$Register, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vphaddd $tmp,$src2,$src2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpaddd $tmp2,$tmp2,$tmp\n\t"
+ "movd $dst,$tmp2\t! add reduction2I" %}
+ ins_encode %{
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseSSE > 2 && UseAVX == 0);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp2, TEMP tmp);
+ format %{ "movdqu $tmp2,$src2\n\t"
+ "phaddd $tmp2,$tmp2\n\t"
+ "phaddd $tmp2,$tmp2\n\t"
+ "movd $tmp,$src1\n\t"
+ "paddd $tmp,$tmp2\n\t"
+ "movd $dst,$tmp\t! add reduction4I" %}
+ ins_encode %{
+ __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
+ __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdl($tmp$$XMMRegister, $src1$$Register);
+ __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdl($dst$$Register, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vphaddd $tmp,$src2,$src2\n\t"
+ "vphaddd $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpaddd $tmp2,$tmp2,$tmp\n\t"
+ "movd $dst,$tmp2\t! add reduction4I" %}
+ ins_encode %{
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
+ __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vphaddd $tmp,$src2,$src2\n\t"
+ "vphaddd $tmp,$tmp,$tmp2\n\t"
+ "vextractf128 $tmp2,$tmp\n\t"
+ "vpaddd $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpaddd $tmp2,$tmp2,$tmp\n\t"
+ "movd $dst,$tmp2\t! add reduction8I" %}
+ ins_encode %{
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true);
+ __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true);
+ __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseSSE >= 1 && UseAVX == 0);
+ match(Set dst (AddReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "movdqu $tmp,$src1\n\t"
+ "addss $tmp,$src2\n\t"
+ "pshufd $tmp2,$src2,0x01\n\t"
+ "addss $tmp,$tmp2\n\t"
+ "movdqu $dst,$tmp\t! add reduction2F" %}
+ ins_encode %{
+ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+ __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddReductionVF src1 src2));
+ effect(TEMP tmp2, TEMP tmp);
+ format %{ "vaddss $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %}
+ ins_encode %{
+ __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseSSE >= 1 && UseAVX == 0);
+ match(Set dst (AddReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "movdqu $tmp,$src1\n\t"
+ "addss $tmp,$src2\n\t"
+ "pshufd $tmp2,$src2,0x01\n\t"
+ "addss $tmp,$tmp2\n\t"
+ "pshufd $tmp2,$src2,0x02\n\t"
+ "addss $tmp,$tmp2\n\t"
+ "pshufd $tmp2,$src2,0x03\n\t"
+ "addss $tmp,$tmp2\n\t"
+ "movdqu $dst,$tmp\t! add reduction4F" %}
+ ins_encode %{
+ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+ __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vaddss $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x02\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x03\n\t"
+ "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %}
+ ins_encode %{
+ __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vaddss $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x02\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x03\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vextractf128 $tmp3,$src2\n\t"
+ "vaddss $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0x01\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x02\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x03\n\t"
+ "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %}
+ ins_encode %{
+ __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+ __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+ predicate(UseSSE >= 1 && UseAVX == 0);
+ match(Set dst (AddReductionVD src1 src2));
+ effect(TEMP tmp, TEMP dst);
+ format %{ "movdqu $tmp,$src1\n\t"
+ "addsd $tmp,$src2\n\t"
+ "pshufd $dst,$src2,0xE\n\t"
+ "addsd $dst,$tmp\t! add reduction2D" %}
+ ins_encode %{
+ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+ __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddReductionVD src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vaddsd $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
+ "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %}
+ ins_encode %{
+ __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddReductionVD src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vaddsd $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
+ "vaddsd $tmp2,$tmp2,$tmp\n\t"
+ "vextractf128 $tmp3,$src2\n\t"
+ "vaddsd $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0xE\n\t"
+ "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %}
+ ins_encode %{
+ __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
+ __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+ __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseSSE > 3 && UseAVX == 0);
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "pshufd $tmp2,$src2,0x1\n\t"
+ "pmulld $tmp2,$src2\n\t"
+ "movd $tmp,$src1\n\t"
+ "pmulld $tmp2,$tmp\n\t"
+ "movd $dst,$tmp2\t! mul reduction2I" %}
+ ins_encode %{
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ movdl($tmp$$XMMRegister, $src1$$Register);
+ __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "pshufd $tmp2,$src2,0x1\n\t"
+ "vpmulld $tmp,$src2,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpmulld $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! mul reduction2I" %}
+ ins_encode %{
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseSSE > 3 && UseAVX == 0);
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "pshufd $tmp2,$src2,0xE\n\t"
+ "pmulld $tmp2,$src2\n\t"
+ "pshufd $tmp,$tmp2,0x1\n\t"
+ "pmulld $tmp2,$tmp\n\t"
+ "movd $tmp,$src1\n\t"
+ "pmulld $tmp2,$tmp\n\t"
+ "movd $dst,$tmp2\t! mul reduction4I" %}
+ ins_encode %{
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
+ __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ movdl($tmp$$XMMRegister, $src1$$Register);
+ __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "pshufd $tmp2,$src2,0xE\n\t"
+ "vpmulld $tmp,$src2,$tmp2\n\t"
+ "pshufd $tmp2,$tmp,0x1\n\t"
+ "vpmulld $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpmulld $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! mul reduction4I" %}
+ ins_encode %{
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vextractf128 $tmp,$src2\n\t"
+ "vpmulld $tmp,$tmp,$src2\n\t"
+ "pshufd $tmp2,$tmp,0xE\n\t"
+ "vpmulld $tmp,$tmp,$tmp2\n\t"
+ "pshufd $tmp2,$tmp,0x1\n\t"
+ "vpmulld $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpmulld $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! mul reduction8I" %}
+ ins_encode %{
+ __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseSSE >= 1 && UseAVX == 0);
+ match(Set dst (MulReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "movdqu $tmp,$src1\n\t"
+ "mulss $tmp,$src2\n\t"
+ "pshufd $tmp2,$src2,0x01\n\t"
+ "mulss $tmp,$tmp2\n\t"
+ "movdqu $dst,$tmp\t! add reduction2F" %}
+ ins_encode %{
+ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+ __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vmulss $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "vmulss $dst,$tmp2,$tmp\t! add reduction2F" %}
+ ins_encode %{
+ __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseSSE >= 1 && UseAVX == 0);
+ match(Set dst (MulReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "movdqu $tmp,$src1\n\t"
+ "mulss $tmp,$src2\n\t"
+ "pshufd $tmp2,$src2,0x01\n\t"
+ "mulss $tmp,$tmp2\n\t"
+ "pshufd $tmp2,$src2,0x02\n\t"
+ "mulss $tmp,$tmp2\n\t"
+ "pshufd $tmp2,$src2,0x03\n\t"
+ "mulss $tmp,$tmp2\n\t"
+ "movdqu $dst,$tmp\t! add reduction4F" %}
+ ins_encode %{
+ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+ __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vmulss $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x02\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x03\n\t"
+ "vmulss $dst,$tmp2,$tmp\t! add reduction4F" %}
+ ins_encode %{
+ __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vmulss $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x02\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x03\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vextractf128 $tmp3,$src2\n\t"
+ "vmulss $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0x01\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x02\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x03\n\t"
+ "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %}
+ ins_encode %{
+ __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+ __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+ predicate(UseSSE >= 1 && UseAVX == 0);
+ match(Set dst (MulReductionVD src1 src2));
+ effect(TEMP tmp, TEMP dst);
+ format %{ "movdqu $tmp,$src1\n\t"
+ "mulsd $tmp,$src2\n\t"
+ "pshufd $dst,$src2,0xE\n\t"
+ "mulsd $dst,$tmp\t! add reduction2D" %}
+ ins_encode %{
+ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+ __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulReductionVD src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vmulsd $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
+ "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %}
+ ins_encode %{
+ __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulReductionVD src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vmulsd $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
+ "vmulsd $tmp2,$tmp2,$tmp\n\t"
+ "vextractf128 $tmp3,$src2\n\t"
+ "vmulsd $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0xE\n\t"
+ "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %}
+ ins_encode %{
+ __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
+ __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+ __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// ====================VECTOR ARITHMETIC=======================================
// --------------------------------- ADD --------------------------------------
--- a/hotspot/src/cpu/x86/vm/x86_64.ad Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad Fri Apr 03 11:41:01 2015 -0700
@@ -3604,6 +3604,23 @@
%}
%}
+// Indirect Memory Plus Positive Index Register Plus Offset Operand
+operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
+ match(AddP (AddP reg (ConvI2L idx)) off);
+
+ op_cost(10);
+ format %{"[$reg + $off + $idx]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index($idx);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
%{
@@ -3755,6 +3772,23 @@
%}
%}
+// Indirect Memory Times Plus Positive Index Register Plus Offset Operand
+operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
+ match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
+
+ op_cost(10);
+ format %{"[$reg + $off + $idx]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index($idx);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
%{
@@ -3946,11 +3980,11 @@
// case of this is memory operands.
opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
- indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
+ indIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
indCompressedOopOffset,
indirectNarrow, indOffset8Narrow, indOffset32Narrow,
indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
- indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
+ indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
//----------PIPELINE-----------------------------------------------------------
// Rules which define the behavior of the target architectures pipeline.
@@ -4984,6 +5018,17 @@
ins_pipe(ialu_reg_reg_fat);
%}
+instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
+%{
+ match(Set dst mem);
+
+ ins_cost(110);
+ format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
+ opcode(0x8D);
+ ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+ ins_pipe(ialu_reg_reg_fat);
+%}
+
instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
%{
match(Set dst mem);
@@ -5068,6 +5113,18 @@
ins_pipe(ialu_reg_reg_fat);
%}
+instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
+%{
+ predicate(Universe::narrow_oop_shift() == 0);
+ match(Set dst mem);
+
+ ins_cost(110);
+ format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
+ opcode(0x8D);
+ ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+ ins_pipe(ialu_reg_reg_fat);
+%}
+
instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
%{
predicate(Universe::narrow_oop_shift() == 0);
--- a/hotspot/src/cpu/zero/vm/vm_version_zero.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/cpu/zero/vm/vm_version_zero.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2009 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -30,4 +30,11 @@
#include "runtime/stubCodeGenerator.hpp"
#include "vm_version_zero.hpp"
-// This file is intentionally empty
+
+void VM_Version::initialize() {
+ // This machine does not allow unaligned memory accesses
+ if (! FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+ warning("Unaligned memory access is not available on this CPU");
+ FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+ }
+}
--- a/hotspot/src/os/aix/vm/attachListener_aix.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/attachListener_aix.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -144,6 +144,10 @@
}
char* next() {
if (*_pos == '\0') {
+ if (_pos < _end) {
+ _pos += 1;
+ }
+
return NULL;
}
char* res = _pos;
@@ -214,6 +218,7 @@
// bind socket
struct sockaddr_un addr;
+ memset((void *)&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
strcpy(addr.sun_path, initial_path);
::unlink(initial_path);
--- a/hotspot/src/os/aix/vm/globals_aix.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/globals_aix.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,10 @@
//
#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
\
+ /* Use 64K pages for virtual memory (shmat). */ \
+ product(bool, Use64KPages, true, \
+ "Use 64K pages if available.") \
+ \
/* If UseLargePages == true allow or deny usage of 16M pages. 16M pages are */ \
/* a scarce resource and there may be situations where we do not want the VM */ \
/* to run with 16M pages. (Will fall back to 64K pages). */ \
@@ -55,7 +59,7 @@
// Defines Aix-specific default values. The flags are available on all
// platforms, but they may have different default values on other platforms.
//
-define_pd_global(bool, UseLargePages, true);
+define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePagesIndividualAllocation, false);
define_pd_global(bool, UseOSErrorReporting, false);
define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/aix/vm/interfaceSupport_aix.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/interfaceSupport_aix.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,8 +23,8 @@
*
*/
-#ifndef OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
-#define OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
+#ifndef OS_AIX_VM_INTERFACESUPPORT_AIX_HPP
+#define OS_AIX_VM_INTERFACESUPPORT_AIX_HPP
// Contains inlined functions for class InterfaceSupport
@@ -32,4 +32,4 @@
os::write_memory_serialize_page(thread);
}
-#endif // OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
+#endif // OS_AIX_VM_INTERFACESUPPORT_AIX_HPP
--- a/hotspot/src/os/aix/vm/osThread_aix.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/osThread_aix.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,7 @@
_startThread_lock = new Monitor(Mutex::event, "startThread_lock", true,
Monitor::_safepoint_check_never);
- assert(_startThread_lock !=NULL, "check");
+ assert(_startThread_lock != NULL, "check");
}
void OSThread::pd_destroy() {
--- a/hotspot/src/os/aix/vm/os_aix.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/os_aix.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -113,6 +113,10 @@
#define RUSAGE_THREAD (1) /* only the calling thread */
#endif
+// PPC port
+static const uintx Use64KPagesThreshold = 1*M;
+static const uintx MaxExpectedDataSegmentSize = SIZE_4G*2;
+
// Add missing declarations (should be in procinfo.h but isn't until AIX 6.1).
#if !defined(_AIXVERSION_610)
extern "C" {
@@ -168,8 +172,8 @@
return -1; \
}
-// query dimensions of the stack of the calling thread
-static void query_stack_dimensions(address* p_stack_base, size_t* p_stack_size);
+// Query dimensions of the stack of the calling thread.
+static bool query_stack_dimensions(address* p_stack_base, size_t* p_stack_size);
// function to check a given stack pointer against given stack limits
inline bool is_valid_stackpointer(stackptr_t sp, stackptr_t stack_base, size_t stack_size) {
@@ -220,9 +224,6 @@
int os::Aix::_on_pase = -1;
int os::Aix::_os_version = -1;
int os::Aix::_stack_page_size = -1;
-size_t os::Aix::_shm_default_page_size = -1;
-int os::Aix::_can_use_64K_pages = -1;
-int os::Aix::_can_use_16M_pages = -1;
int os::Aix::_xpg_sus_mode = -1;
int os::Aix::_extshm = -1;
int os::Aix::_logical_cpus = -1;
@@ -240,6 +241,63 @@
static sigset_t SR_sigset;
static pthread_mutex_t dl_mutex; // Used to protect dlsym() calls.
+// This describes the state of multipage support of the underlying
+// OS. Note that this is of no interest to the outsize world and
+// therefore should not be defined in AIX class.
+//
+// AIX supports four different page sizes - 4K, 64K, 16MB, 16GB. The
+// latter two (16M "large" resp. 16G "huge" pages) require special
+// setup and are normally not available.
+//
+// AIX supports multiple page sizes per process, for:
+// - Stack (of the primordial thread, so not relevant for us)
+// - Data - data, bss, heap, for us also pthread stacks
+// - Text - text code
+// - shared memory
+//
+// Default page sizes can be set via linker options (-bdatapsize, -bstacksize, ...)
+// and via environment variable LDR_CNTRL (DATAPSIZE, STACKPSIZE, ...).
+//
+// For shared memory, page size can be set dynamically via
+// shmctl(). Different shared memory regions can have different page
+// sizes.
+//
+// More information can be found at AIBM info center:
+// http://publib.boulder.ibm.com/infocenter/aix/v6r1/index.jsp?topic=/com.ibm.aix.prftungd/doc/prftungd/multiple_page_size_app_support.htm
+//
+static struct {
+ size_t pagesize; // sysconf _SC_PAGESIZE (4K)
+ size_t datapsize; // default data page size (LDR_CNTRL DATAPSIZE)
+ size_t shmpsize; // default shared memory page size (LDR_CNTRL SHMPSIZE)
+ size_t pthr_stack_pagesize; // stack page size of pthread threads
+ size_t textpsize; // default text page size (LDR_CNTRL STACKPSIZE)
+ bool can_use_64K_pages; // True if we can alloc 64K pages dynamically with Sys V shm.
+ bool can_use_16M_pages; // True if we can alloc 16M pages dynamically with Sys V shm.
+ int error; // Error describing if something went wrong at multipage init.
+} g_multipage_support = {
+ (size_t) -1,
+ (size_t) -1,
+ (size_t) -1,
+ (size_t) -1,
+ (size_t) -1,
+ false, false,
+ 0
+};
+
+// We must not accidentally allocate memory close to the BRK - even if
+// that would work - because then we prevent the BRK segment from
+// growing which may result in a malloc OOM even though there is
+// enough memory. The problem only arises if we shmat() or mmap() at
+// a specific wish address, e.g. to place the heap in a
+// compressed-oops-friendly way.
+static bool is_close_to_brk(address a) {
+ address a1 = (address) sbrk(0);
+ if (a >= a1 && a < (a1 + MaxExpectedDataSegmentSize)) {
+ return true;
+ }
+ return false;
+}
+
julong os::available_memory() {
return Aix::available_memory();
}
@@ -291,7 +349,7 @@
}
// Maximum size 32bit disclaim() accepts. (Theoretically 4GB, but I just do not trust that.)
- const unsigned int maxDisclaimSize = 0x80000000;
+ const unsigned int maxDisclaimSize = 0x40000000;
const unsigned int numFullDisclaimsNeeded = (size / maxDisclaimSize);
const unsigned int lastDisclaimSize = (size % maxDisclaimSize);
@@ -368,138 +426,131 @@
case SIZE_64K: return "64K";
case SIZE_16M: return "16M";
case SIZE_16G: return "16G";
+ case -1: return "not set";
default:
assert(false, "surprise");
return "??";
}
}
-// Retrieve information about multipage size support. Will initialize
-// Aix::_page_size, Aix::_stack_page_size, Aix::_can_use_64K_pages,
-// Aix::_can_use_16M_pages.
+// Probe OS for multipage support.
+// Will fill the global g_multipage_support structure.
// Must be called before calling os::large_page_init().
-void os::Aix::query_multipage_support() {
-
- guarantee(_page_size == -1 &&
- _stack_page_size == -1 &&
- _can_use_64K_pages == -1 &&
- _can_use_16M_pages == -1 &&
- g_multipage_error == -1,
+static void query_multipage_support() {
+
+ guarantee(g_multipage_support.pagesize == -1,
"do not call twice");
- _page_size = ::sysconf(_SC_PAGESIZE);
+ g_multipage_support.pagesize = ::sysconf(_SC_PAGESIZE);
// This really would surprise me.
- assert(_page_size == SIZE_4K, "surprise!");
-
+ assert(g_multipage_support.pagesize == SIZE_4K, "surprise!");
// Query default data page size (default page size for C-Heap, pthread stacks and .bss).
- // Default data page size is influenced either by linker options (-bdatapsize)
+ // Default data page size is defined either by linker options (-bdatapsize)
// or by environment variable LDR_CNTRL (suboption DATAPSIZE). If none is given,
// default should be 4K.
- size_t data_page_size = SIZE_4K;
{
- void* p = os::malloc(SIZE_16M, mtInternal);
- guarantee(p != NULL, "malloc failed");
- data_page_size = os::Aix::query_pagesize(p);
- os::free(p);
- }
-
- // query default shm page size (LDR_CNTRL SHMPSIZE)
+ void* p = ::malloc(SIZE_16M);
+ g_multipage_support.datapsize = os::Aix::query_pagesize(p);
+ ::free(p);
+ }
+
+ // Query default shm page size (LDR_CNTRL SHMPSIZE).
{
const int shmid = ::shmget(IPC_PRIVATE, 1, IPC_CREAT | S_IRUSR | S_IWUSR);
guarantee(shmid != -1, "shmget failed");
void* p = ::shmat(shmid, NULL, 0);
::shmctl(shmid, IPC_RMID, NULL);
guarantee(p != (void*) -1, "shmat failed");
- _shm_default_page_size = os::Aix::query_pagesize(p);
+ g_multipage_support.shmpsize = os::Aix::query_pagesize(p);
::shmdt(p);
}
- // before querying the stack page size, make sure we are not running as primordial
+ // Before querying the stack page size, make sure we are not running as primordial
// thread (because primordial thread's stack may have different page size than
// pthread thread stacks). Running a VM on the primordial thread won't work for a
- // number of reasons so we may just as well guarantee it here
- guarantee(!os::Aix::is_primordial_thread(), "Must not be called for primordial thread");
-
- // query stack page size
+ // number of reasons so we may just as well guarantee it here.
+ guarantee0(!os::Aix::is_primordial_thread());
+
+ // Query pthread stack page size.
{
int dummy = 0;
- _stack_page_size = os::Aix::query_pagesize(&dummy);
- // everything else would surprise me and should be looked into
- guarantee(_stack_page_size == SIZE_4K || _stack_page_size == SIZE_64K, "Wrong page size");
- // also, just for completeness: pthread stacks are allocated from C heap, so
- // stack page size should be the same as data page size
- guarantee(_stack_page_size == data_page_size, "stack page size should be the same as data page size");
- }
-
- // EXTSHM is bad: among other things, it prevents setting pagesize dynamically
- // for system V shm.
- if (Aix::extshm()) {
- if (Verbose) {
- fprintf(stderr, "EXTSHM is active - will disable large page support.\n"
- "Please make sure EXTSHM is OFF for large page support.\n");
- }
- g_multipage_error = ERROR_MP_EXTSHM_ACTIVE;
- _can_use_64K_pages = _can_use_16M_pages = 0;
+ g_multipage_support.pthr_stack_pagesize = os::Aix::query_pagesize(&dummy);
+ }
+
+ // Query default text page size (LDR_CNTRL TEXTPSIZE).
+ /* PPC port: so far unused.
+ {
+ address any_function =
+ (address) resolve_function_descriptor_to_code_pointer((address)describe_pagesize);
+ g_multipage_support.textpsize = os::Aix::query_pagesize(any_function);
+ }
+ */
+
+ // Now probe for support of 64K pages and 16M pages.
+
+ // Before OS/400 V6R1, there is no support for pages other than 4K.
+ if (os::Aix::on_pase_V5R4_or_older()) {
+ Unimplemented();
goto query_multipage_support_end;
}
- // now check which page sizes the OS claims it supports, and of those, which actually can be used.
+ // Now check which page sizes the OS claims it supports, and of those, which actually can be used.
{
const int MAX_PAGE_SIZES = 4;
psize_t sizes[MAX_PAGE_SIZES];
const int num_psizes = ::vmgetinfo(sizes, VMINFO_GETPSIZES, MAX_PAGE_SIZES);
if (num_psizes == -1) {
- if (Verbose) {
- fprintf(stderr, "vmgetinfo(VMINFO_GETPSIZES) failed (errno: %d)\n", errno);
- fprintf(stderr, "disabling multipage support.\n");
- }
- g_multipage_error = ERROR_MP_VMGETINFO_FAILED;
- _can_use_64K_pages = _can_use_16M_pages = 0;
+ trc("vmgetinfo(VMINFO_GETPSIZES) failed (errno: %d)\n", errno);
+ trc("disabling multipage support.\n");
+ g_multipage_support.error = ERROR_MP_VMGETINFO_FAILED;
goto query_multipage_support_end;
}
guarantee(num_psizes > 0, "vmgetinfo(.., VMINFO_GETPSIZES, ...) failed.");
assert(num_psizes <= MAX_PAGE_SIZES, "Surprise! more than 4 page sizes?");
- if (Verbose) {
- fprintf(stderr, "vmgetinfo(.., VMINFO_GETPSIZES, ...) returns %d supported page sizes: ", num_psizes);
- for (int i = 0; i < num_psizes; i ++) {
- fprintf(stderr, " %s ", describe_pagesize(sizes[i]));
- }
- fprintf(stderr, " .\n");
+ trcVerbose("vmgetinfo(.., VMINFO_GETPSIZES, ...) returns %d supported page sizes: ", num_psizes);
+ for (int i = 0; i < num_psizes; i ++) {
+ trcVerbose(" %s ", describe_pagesize(sizes[i]));
}
// Can we use 64K, 16M pages?
- _can_use_64K_pages = 0;
- _can_use_16M_pages = 0;
for (int i = 0; i < num_psizes; i ++) {
- if (sizes[i] == SIZE_64K) {
- _can_use_64K_pages = 1;
- } else if (sizes[i] == SIZE_16M) {
- _can_use_16M_pages = 1;
+ const size_t pagesize = sizes[i];
+ if (pagesize != SIZE_64K && pagesize != SIZE_16M) {
+ continue;
}
- }
-
- if (!_can_use_64K_pages) {
- g_multipage_error = ERROR_MP_VMGETINFO_CLAIMS_NO_SUPPORT_FOR_64K;
- }
-
- // Double-check for 16M pages: Even if AIX claims to be able to use 16M pages,
- // there must be an actual 16M page pool, and we must run with enough rights.
- if (_can_use_16M_pages) {
- const int shmid = ::shmget(IPC_PRIVATE, SIZE_16M, IPC_CREAT | S_IRUSR | S_IWUSR);
- guarantee(shmid != -1, "shmget failed");
+ bool can_use = false;
+ trcVerbose("Probing support for %s pages...", describe_pagesize(pagesize));
+ const int shmid = ::shmget(IPC_PRIVATE, pagesize,
+ IPC_CREAT | S_IRUSR | S_IWUSR);
+ guarantee0(shmid != -1); // Should always work.
+ // Try to set pagesize.
struct shmid_ds shm_buf = { 0 };
- shm_buf.shm_pagesize = SIZE_16M;
- const bool can_set_pagesize = ::shmctl(shmid, SHM_PAGESIZE, &shm_buf) == 0 ? true : false;
- const int en = errno;
- ::shmctl(shmid, IPC_RMID, NULL);
- if (!can_set_pagesize) {
- if (Verbose) {
- fprintf(stderr, "Failed to allocate even one misely 16M page. shmctl failed with %d (%s).\n"
- "Will deactivate 16M support.\n", en, strerror(en));
+ shm_buf.shm_pagesize = pagesize;
+ if (::shmctl(shmid, SHM_PAGESIZE, &shm_buf) != 0) {
+ const int en = errno;
+ ::shmctl(shmid, IPC_RMID, NULL); // As early as possible!
+ // PPC port trcVerbose("shmctl(SHM_PAGESIZE) failed with %s",
+ // PPC port MiscUtils::describe_errno(en));
+ } else {
+ // Attach and double check pageisze.
+ void* p = ::shmat(shmid, NULL, 0);
+ ::shmctl(shmid, IPC_RMID, NULL); // As early as possible!
+ guarantee0(p != (void*) -1); // Should always work.
+ const size_t real_pagesize = os::Aix::query_pagesize(p);
+ if (real_pagesize != pagesize) {
+ trcVerbose("real page size (0x%llX) differs.", real_pagesize);
+ } else {
+ can_use = true;
}
- _can_use_16M_pages = 0;
+ ::shmdt(p);
+ }
+ trcVerbose("Can use: %s", (can_use ? "yes" : "no"));
+ if (pagesize == SIZE_64K) {
+ g_multipage_support.can_use_64K_pages = can_use;
+ } else if (pagesize == SIZE_16M) {
+ g_multipage_support.can_use_16M_pages = can_use;
}
}
@@ -507,23 +558,29 @@
query_multipage_support_end:
- guarantee(_page_size != -1 &&
- _stack_page_size != -1 &&
- _can_use_64K_pages != -1 &&
- _can_use_16M_pages != -1, "Page sizes not properly initialized");
-
- if (_can_use_64K_pages) {
- g_multipage_error = 0;
- }
-
- if (Verbose) {
- fprintf(stderr, "Data page size (C-Heap, bss, etc): %s\n", describe_pagesize(data_page_size));
- fprintf(stderr, "Thread stack page size (pthread): %s\n", describe_pagesize(_stack_page_size));
- fprintf(stderr, "Default shared memory page size: %s\n", describe_pagesize(_shm_default_page_size));
- fprintf(stderr, "Can use 64K pages dynamically with shared meory: %s\n", (_can_use_64K_pages ? "yes" :"no"));
- fprintf(stderr, "Can use 16M pages dynamically with shared memory: %s\n", (_can_use_16M_pages ? "yes" :"no"));
- fprintf(stderr, "Multipage error details: %d\n", g_multipage_error);
- }
+ trcVerbose("base page size (sysconf _SC_PAGESIZE): %s\n",
+ describe_pagesize(g_multipage_support.pagesize));
+ trcVerbose("Data page size (C-Heap, bss, etc): %s\n",
+ describe_pagesize(g_multipage_support.datapsize));
+ trcVerbose("Text page size: %s\n",
+ describe_pagesize(g_multipage_support.textpsize));
+ trcVerbose("Thread stack page size (pthread): %s\n",
+ describe_pagesize(g_multipage_support.pthr_stack_pagesize));
+ trcVerbose("Default shared memory page size: %s\n",
+ describe_pagesize(g_multipage_support.shmpsize));
+ trcVerbose("Can use 64K pages dynamically with shared meory: %s\n",
+ (g_multipage_support.can_use_64K_pages ? "yes" :"no"));
+ trcVerbose("Can use 16M pages dynamically with shared memory: %s\n",
+ (g_multipage_support.can_use_16M_pages ? "yes" :"no"));
+ trcVerbose("Multipage error details: %d\n",
+ g_multipage_support.error);
+
+ // sanity checks
+ assert0(g_multipage_support.pagesize == SIZE_4K);
+ assert0(g_multipage_support.datapsize == SIZE_4K || g_multipage_support.datapsize == SIZE_64K);
+ // PPC port: so far unused.assert0(g_multipage_support.textpsize == SIZE_4K || g_multipage_support.textpsize == SIZE_64K);
+ assert0(g_multipage_support.pthr_stack_pagesize == g_multipage_support.datapsize);
+ assert0(g_multipage_support.shmpsize == SIZE_4K || g_multipage_support.shmpsize == SIZE_64K);
} // end os::Aix::query_multipage_support()
@@ -1572,9 +1629,12 @@
st->print_cr(" default page size: %s", describe_pagesize(os::vm_page_size()));
st->print_cr(" default stack page size: %s", describe_pagesize(os::vm_page_size()));
- st->print_cr(" default shm page size: %s", describe_pagesize(os::Aix::shm_default_page_size()));
- st->print_cr(" can use 64K pages dynamically: %s", (os::Aix::can_use_64K_pages() ? "yes" :"no"));
- st->print_cr(" can use 16M pages dynamically: %s", (os::Aix::can_use_16M_pages() ? "yes" :"no"));
+ st->print_cr(" Default shared memory page size: %s",
+ describe_pagesize(g_multipage_support.shmpsize));
+ st->print_cr(" Can use 64K pages dynamically with shared meory: %s",
+ (g_multipage_support.can_use_64K_pages ? "yes" :"no"));
+ st->print_cr(" Can use 16M pages dynamically with shared memory: %s",
+ (g_multipage_support.can_use_16M_pages ? "yes" :"no"));
if (g_multipage_error != 0) {
st->print_cr(" multipage error: %d", g_multipage_error);
}
@@ -1585,6 +1645,9 @@
const char* const extshm = ::getenv("EXTSHM");
st->print_cr(" EXTSHM=%s.", extshm ? extshm : "<unset>");
+ if ( (strcmp(extshm, "on") == 0) || (strcmp(extshm, "ON") == 0) ) {
+ st->print_cr(" *** Unsupported! Please remove EXTSHM from your environment! ***");
+ }
// Call os::Aix::get_meminfo() to retrieve memory statistics.
os::Aix::meminfo_t mi;
@@ -1827,315 +1890,386 @@
////////////////////////////////////////////////////////////////////////////////
// Virtual Memory
-// AddrRange describes an immutable address range
-//
-// This is a helper class for the 'shared memory bookkeeping' below.
-class AddrRange {
- friend class ShmBkBlock;
-
- char* _start;
- size_t _size;
-
-public:
-
- AddrRange(char* start, size_t size)
- : _start(start), _size(size)
- {}
-
- AddrRange(const AddrRange& r)
- : _start(r.start()), _size(r.size())
- {}
-
- char* start() const { return _start; }
- size_t size() const { return _size; }
- char* end() const { return _start + _size; }
- bool is_empty() const { return _size == 0 ? true : false; }
-
- static AddrRange empty_range() { return AddrRange(NULL, 0); }
-
- bool contains(const char* p) const {
- return start() <= p && end() > p;
- }
-
- bool contains(const AddrRange& range) const {
- return start() <= range.start() && end() >= range.end();
- }
-
- bool intersects(const AddrRange& range) const {
- return (range.start() <= start() && range.end() > start()) ||
- (range.start() < end() && range.end() >= end()) ||
- contains(range);
- }
-
- bool is_same_range(const AddrRange& range) const {
- return start() == range.start() && size() == range.size();
- }
-
- // return the closest inside range consisting of whole pages
- AddrRange find_closest_aligned_range(size_t pagesize) const {
- if (pagesize == 0 || is_empty()) {
- return empty_range();
+// We need to keep small simple bookkeeping for os::reserve_memory and friends.
+
+#define VMEM_MAPPED 1
+#define VMEM_SHMATED 2
+
+struct vmembk_t {
+ int type; // 1 - mmap, 2 - shmat
+ char* addr;
+ size_t size; // Real size, may be larger than usersize.
+ size_t pagesize; // page size of area
+ vmembk_t* next;
+
+ bool contains_addr(char* p) const {
+ return p >= addr && p < (addr + size);
+ }
+
+ bool contains_range(char* p, size_t s) const {
+ return contains_addr(p) && contains_addr(p + s - 1);
+ }
+
+ void print_on(outputStream* os) const {
+ os->print("[" PTR_FORMAT " - " PTR_FORMAT "] (" UINTX_FORMAT
+ " bytes, %d %s pages), %s",
+ addr, addr + size - 1, size, size / pagesize, describe_pagesize(pagesize),
+ (type == VMEM_SHMATED ? "shmat" : "mmap")
+ );
+ }
+
+ // Check that range is a sub range of memory block (or equal to memory block);
+ // also check that range is fully page aligned to the page size if the block.
+ void assert_is_valid_subrange(char* p, size_t s) const {
+ if (!contains_range(p, s)) {
+ fprintf(stderr, "[" PTR_FORMAT " - " PTR_FORMAT "] is not a sub "
+ "range of [" PTR_FORMAT " - " PTR_FORMAT "].\n",
+ p, p + s - 1, addr, addr + size - 1);
+ guarantee0(false);
}
- char* const from = (char*)align_size_up((intptr_t)_start, pagesize);
- char* const to = (char*)align_size_down((intptr_t)end(), pagesize);
- if (from > to) {
- return empty_range();
+ if (!is_aligned_to(p, pagesize) || !is_aligned_to(p + s, pagesize)) {
+ fprintf(stderr, "range [" PTR_FORMAT " - " PTR_FORMAT "] is not"
+ " aligned to pagesize (%s)\n", p, p + s);
+ guarantee0(false);
}
- return AddrRange(from, to - from);
}
};
-////////////////////////////////////////////////////////////////////////////
-// shared memory bookkeeping
-//
-// the os::reserve_memory() API and friends hand out different kind of memory, depending
-// on need and circumstances. Memory may be allocated with mmap() or with shmget/shmat.
-//
-// But these memory types have to be treated differently. For example, to uncommit
-// mmap-based memory, msync(MS_INVALIDATE) is needed, to uncommit shmat-based memory,
-// disclaim64() is needed.
-//
-// Therefore we need to keep track of the allocated memory segments and their
-// properties.
-
-// ShmBkBlock: base class for all blocks in the shared memory bookkeeping
-class ShmBkBlock : public CHeapObj<mtInternal> {
-
- ShmBkBlock* _next;
-
-protected:
-
- AddrRange _range;
- const size_t _pagesize;
- const bool _pinned;
-
-public:
-
- ShmBkBlock(AddrRange range, size_t pagesize, bool pinned)
- : _range(range), _pagesize(pagesize), _pinned(pinned) , _next(NULL) {
-
- assert(_pagesize == SIZE_4K || _pagesize == SIZE_64K || _pagesize == SIZE_16M, "invalid page size");
- assert(!_range.is_empty(), "invalid range");
- }
-
- virtual void print(outputStream* st) const {
- st->print("0x%p ... 0x%p (%llu) - %d %s pages - %s",
- _range.start(), _range.end(), _range.size(),
- _range.size() / _pagesize, describe_pagesize(_pagesize),
- _pinned ? "pinned" : "");
- }
-
- enum Type { MMAP, SHMAT };
- virtual Type getType() = 0;
-
- char* base() const { return _range.start(); }
- size_t size() const { return _range.size(); }
-
- void setAddrRange(AddrRange range) {
- _range = range;
- }
-
- bool containsAddress(const char* p) const {
- return _range.contains(p);
- }
-
- bool containsRange(const char* p, size_t size) const {
- return _range.contains(AddrRange((char*)p, size));
- }
-
- bool isSameRange(const char* p, size_t size) const {
- return _range.is_same_range(AddrRange((char*)p, size));
- }
-
- virtual bool disclaim(char* p, size_t size) = 0;
- virtual bool release() = 0;
-
- // blocks live in a list.
- ShmBkBlock* next() const { return _next; }
- void set_next(ShmBkBlock* blk) { _next = blk; }
-
-}; // end: ShmBkBlock
-
-
-// ShmBkMappedBlock: describes an block allocated with mmap()
-class ShmBkMappedBlock : public ShmBkBlock {
-public:
-
- ShmBkMappedBlock(AddrRange range)
- : ShmBkBlock(range, SIZE_4K, false) {} // mmap: always 4K, never pinned
-
- void print(outputStream* st) const {
- ShmBkBlock::print(st);
- st->print_cr(" - mmap'ed");
- }
-
- Type getType() {
- return MMAP;
- }
-
- bool disclaim(char* p, size_t size) {
-
- AddrRange r(p, size);
-
- guarantee(_range.contains(r), "invalid disclaim");
-
- // only disclaim whole ranges.
- const AddrRange r2 = r.find_closest_aligned_range(_pagesize);
- if (r2.is_empty()) {
- return true;
+static struct {
+ vmembk_t* first;
+ MiscUtils::CritSect cs;
+} vmem;
+
+static void vmembk_add(char* addr, size_t size, size_t pagesize, int type) {
+ vmembk_t* p = (vmembk_t*) ::malloc(sizeof(vmembk_t));
+ assert0(p);
+ if (p) {
+ MiscUtils::AutoCritSect lck(&vmem.cs);
+ p->addr = addr; p->size = size;
+ p->pagesize = pagesize;
+ p->type = type;
+ p->next = vmem.first;
+ vmem.first = p;
+ }
+}
+
+static vmembk_t* vmembk_find(char* addr) {
+ MiscUtils::AutoCritSect lck(&vmem.cs);
+ for (vmembk_t* p = vmem.first; p; p = p->next) {
+ if (p->addr <= addr && (p->addr + p->size) > addr) {
+ return p;
}
-
- const int rc = ::msync(r2.start(), r2.size(), MS_INVALIDATE);
-
- if (rc != 0) {
- warning("msync(0x%p, %llu, MS_INVALIDATE) failed (%d)\n", r2.start(), r2.size(), errno);
- }
-
- return rc == 0 ? true : false;
- }
-
- bool release() {
- // mmap'ed blocks are released using munmap
- if (::munmap(_range.start(), _range.size()) != 0) {
- warning("munmap(0x%p, %llu) failed (%d)\n", _range.start(), _range.size(), errno);
- return false;
- }
- return true;
- }
-}; // end: ShmBkMappedBlock
-
-// ShmBkShmatedBlock: describes an block allocated with shmget/shmat()
-class ShmBkShmatedBlock : public ShmBkBlock {
-public:
-
- ShmBkShmatedBlock(AddrRange range, size_t pagesize, bool pinned)
- : ShmBkBlock(range, pagesize, pinned) {}
-
- void print(outputStream* st) const {
- ShmBkBlock::print(st);
- st->print_cr(" - shmat'ed");
- }
-
- Type getType() {
- return SHMAT;
- }
-
- bool disclaim(char* p, size_t size) {
-
- AddrRange r(p, size);
-
- if (_pinned) {
- return true;
- }
-
- // shmat'ed blocks are disclaimed using disclaim64
- guarantee(_range.contains(r), "invalid disclaim");
-
- // only disclaim whole ranges.
- const AddrRange r2 = r.find_closest_aligned_range(_pagesize);
- if (r2.is_empty()) {
- return true;
- }
-
- const bool rc = my_disclaim64(r2.start(), r2.size());
-
- if (Verbose && !rc) {
- warning("failed to disclaim shm %p-%p\n", r2.start(), r2.end());
- }
-
- return rc;
- }
-
- bool release() {
- bool rc = false;
- if (::shmdt(_range.start()) != 0) {
- warning("shmdt(0x%p) failed (%d)\n", _range.start(), errno);
- } else {
- rc = true;
- }
- return rc;
- }
-
-}; // end: ShmBkShmatedBlock
-
-static ShmBkBlock* g_shmbk_list = NULL;
-static volatile jint g_shmbk_table_lock = 0;
-
-// keep some usage statistics
-static struct {
- int nodes; // number of nodes in list
- size_t bytes; // reserved - not committed - bytes.
- int reserves; // how often reserve was called
- int lookups; // how often a lookup was made
-} g_shmbk_stats = { 0, 0, 0, 0 };
-
-// add information about a shared memory segment to the bookkeeping
-static void shmbk_register(ShmBkBlock* p_block) {
- guarantee(p_block, "logic error");
- p_block->set_next(g_shmbk_list);
- g_shmbk_list = p_block;
- g_shmbk_stats.reserves ++;
- g_shmbk_stats.bytes += p_block->size();
- g_shmbk_stats.nodes ++;
-}
-
-// remove information about a shared memory segment by its starting address
-static void shmbk_unregister(ShmBkBlock* p_block) {
- ShmBkBlock* p = g_shmbk_list;
- ShmBkBlock* prev = NULL;
- while (p) {
- if (p == p_block) {
- if (prev) {
- prev->set_next(p->next());
- } else {
- g_shmbk_list = p->next();
- }
- g_shmbk_stats.nodes --;
- g_shmbk_stats.bytes -= p->size();
+ }
+ return NULL;
+}
+
+static void vmembk_remove(vmembk_t* p0) {
+ MiscUtils::AutoCritSect lck(&vmem.cs);
+ assert0(p0);
+ assert0(vmem.first); // List should not be empty.
+ for (vmembk_t** pp = &(vmem.first); *pp; pp = &((*pp)->next)) {
+ if (*pp == p0) {
+ *pp = p0->next;
+ ::free(p0);
return;
}
- prev = p;
- p = p->next();
- }
- assert(false, "should not happen");
-}
-
-// given a pointer, return shared memory bookkeeping record for the segment it points into
-// using the returned block info must happen under lock protection
-static ShmBkBlock* shmbk_find_by_containing_address(const char* addr) {
- g_shmbk_stats.lookups ++;
- ShmBkBlock* p = g_shmbk_list;
- while (p) {
- if (p->containsAddress(addr)) {
- return p;
+ }
+ assert0(false); // Not found?
+}
+
+static void vmembk_print_on(outputStream* os) {
+ MiscUtils::AutoCritSect lck(&vmem.cs);
+ for (vmembk_t* vmi = vmem.first; vmi; vmi = vmi->next) {
+ vmi->print_on(os);
+ os->cr();
+ }
+}
+
+// Reserve and attach a section of System V memory.
+// If <requested_addr> is not NULL, function will attempt to attach the memory at the given
+// address. Failing that, it will attach the memory anywhere.
+// If <requested_addr> is NULL, function will attach the memory anywhere.
+//
+// <alignment_hint> is being ignored by this function. It is very probable however that the
+// alignment requirements are met anyway, because shmat() attaches at 256M boundaries.
+// Should this be not enogh, we can put more work into it.
+static char* reserve_shmated_memory (
+ size_t bytes,
+ char* requested_addr,
+ size_t alignment_hint) {
+
+ trcVerbose("reserve_shmated_memory " UINTX_FORMAT " bytes, wishaddress "
+ PTR_FORMAT ", alignment_hint " UINTX_FORMAT "...",
+ bytes, requested_addr, alignment_hint);
+
+ // Either give me wish address or wish alignment but not both.
+ assert0(!(requested_addr != NULL && alignment_hint != 0));
+
+ // We must prevent anyone from attaching too close to the
+ // BRK because that may cause malloc OOM.
+ if (requested_addr != NULL && is_close_to_brk((address)requested_addr)) {
+ trcVerbose("Wish address " PTR_FORMAT " is too close to the BRK segment. "
+ "Will attach anywhere.", requested_addr);
+ // Act like the OS refused to attach there.
+ requested_addr = NULL;
+ }
+
+ // For old AS/400's (V5R4 and older) we should not even be here - System V shared memory is not
+ // really supported (max size 4GB), so reserve_mmapped_memory should have been used instead.
+ if (os::Aix::on_pase_V5R4_or_older()) {
+ ShouldNotReachHere();
+ }
+
+ // Align size of shm up to 64K to avoid errors if we later try to change the page size.
+ const size_t size = align_size_up(bytes, SIZE_64K);
+
+ // Reserve the shared segment.
+ int shmid = shmget(IPC_PRIVATE, size, IPC_CREAT | S_IRUSR | S_IWUSR);
+ if (shmid == -1) {
+ trc("shmget(.., " UINTX_FORMAT ", ..) failed (errno: %d).", size, errno);
+ return NULL;
+ }
+
+ // Important note:
+ // It is very important that we, upon leaving this function, do not leave a shm segment alive.
+ // We must right after attaching it remove it from the system. System V shm segments are global and
+ // survive the process.
+ // So, from here on: Do not assert, do not return, until we have called shmctl(IPC_RMID) (A).
+
+ struct shmid_ds shmbuf;
+ memset(&shmbuf, 0, sizeof(shmbuf));
+ shmbuf.shm_pagesize = SIZE_64K;
+ if (shmctl(shmid, SHM_PAGESIZE, &shmbuf) != 0) {
+ trcVerbose("Failed to set page size (need " UINTX_FORMAT " 64K pages) - shmctl failed with %d.",
+ size / SIZE_64K, errno);
+ // I want to know if this ever happens.
+ assert(false, "failed to set page size for shmat");
+ }
+
+ // Now attach the shared segment.
+ // Note that I attach with SHM_RND - which means that the requested address is rounded down, if
+ // needed, to the next lowest segment boundary. Otherwise the attach would fail if the address
+ // were not a segment boundary.
+ char* const addr = (char*) shmat(shmid, requested_addr, SHM_RND);
+ const int errno_shmat = errno;
+
+ // (A) Right after shmat and before handing shmat errors delete the shm segment.
+ if (::shmctl(shmid, IPC_RMID, NULL) == -1) {
+ trc("shmctl(%u, IPC_RMID) failed (%d)\n", shmid, errno);
+ assert(false, "failed to remove shared memory segment!");
+ }
+
+ // Handle shmat error. If we failed to attach, just return.
+ if (addr == (char*)-1) {
+ trcVerbose("Failed to attach segment at " PTR_FORMAT " (%d).", requested_addr, errno_shmat);
+ return NULL;
+ }
+
+ // Just for info: query the real page size. In case setting the page size did not
+ // work (see above), the system may have given us something other then 4K (LDR_CNTRL).
+ const size_t real_pagesize = os::Aix::query_pagesize(addr);
+ if (real_pagesize != shmbuf.shm_pagesize) {
+ trcVerbose("pagesize is, surprisingly, %h.", real_pagesize);
+ }
+
+ if (addr) {
+ trcVerbose("shm-allocated " PTR_FORMAT " .. " PTR_FORMAT " (" UINTX_FORMAT " bytes, " UINTX_FORMAT " %s pages)",
+ addr, addr + size - 1, size, size/real_pagesize, describe_pagesize(real_pagesize));
+ } else {
+ if (requested_addr != NULL) {
+ trcVerbose("failed to shm-allocate " UINTX_FORMAT " bytes at with address " PTR_FORMAT ".", size, requested_addr);
+ } else {
+ trcVerbose("failed to shm-allocate " UINTX_FORMAT " bytes at any address.", size);
}
- p = p->next();
- }
- return NULL;
-}
-
-// dump all information about all memory segments allocated with os::reserve_memory()
-void shmbk_dump_info() {
- tty->print_cr("-- shared mem bookkeeping (alive: %d segments, %llu bytes, "
- "total reserves: %d total lookups: %d)",
- g_shmbk_stats.nodes, g_shmbk_stats.bytes, g_shmbk_stats.reserves, g_shmbk_stats.lookups);
- const ShmBkBlock* p = g_shmbk_list;
- int i = 0;
- while (p) {
- p->print(tty);
- p = p->next();
- i ++;
- }
-}
-
-#define LOCK_SHMBK { ThreadCritical _LOCK_SHMBK;
-#define UNLOCK_SHMBK }
+ }
+
+ // book-keeping
+ vmembk_add(addr, size, real_pagesize, VMEM_SHMATED);
+ assert0(is_aligned_to(addr, os::vm_page_size()));
+
+ return addr;
+}
+
+static bool release_shmated_memory(char* addr, size_t size) {
+
+ trcVerbose("release_shmated_memory [" PTR_FORMAT " - " PTR_FORMAT "].",
+ addr, addr + size - 1);
+
+ bool rc = false;
+
+ // TODO: is there a way to verify shm size without doing bookkeeping?
+ if (::shmdt(addr) != 0) {
+ trcVerbose("error (%d).", errno);
+ } else {
+ trcVerbose("ok.");
+ rc = true;
+ }
+ return rc;
+}
+
+static bool uncommit_shmated_memory(char* addr, size_t size) {
+ trcVerbose("uncommit_shmated_memory [" PTR_FORMAT " - " PTR_FORMAT "].",
+ addr, addr + size - 1);
+
+ const bool rc = my_disclaim64(addr, size);
+
+ if (!rc) {
+ trcVerbose("my_disclaim64(" PTR_FORMAT ", " UINTX_FORMAT ") failed.\n", addr, size);
+ return false;
+ }
+ return true;
+}
+
+// Reserve memory via mmap.
+// If <requested_addr> is given, an attempt is made to attach at the given address.
+// Failing that, memory is allocated at any address.
+// If <alignment_hint> is given and <requested_addr> is NULL, an attempt is made to
+// allocate at an address aligned with the given alignment. Failing that, memory
+// is aligned anywhere.
+static char* reserve_mmaped_memory(size_t bytes, char* requested_addr, size_t alignment_hint) {
+ trcVerbose("reserve_mmaped_memory " UINTX_FORMAT " bytes, wishaddress " PTR_FORMAT ", "
+ "alignment_hint " UINTX_FORMAT "...",
+ bytes, requested_addr, alignment_hint);
+
+ // If a wish address is given, but not aligned to 4K page boundary, mmap will fail.
+ if (requested_addr && !is_aligned_to(requested_addr, os::vm_page_size()) != 0) {
+ trcVerbose("Wish address " PTR_FORMAT " not aligned to page boundary.", requested_addr);
+ return NULL;
+ }
+
+ // We must prevent anyone from attaching too close to the
+ // BRK because that may cause malloc OOM.
+ if (requested_addr != NULL && is_close_to_brk((address)requested_addr)) {
+ trcVerbose("Wish address " PTR_FORMAT " is too close to the BRK segment. "
+ "Will attach anywhere.", requested_addr);
+ // Act like the OS refused to attach there.
+ requested_addr = NULL;
+ }
+
+ // Specify one or the other but not both.
+ assert0(!(requested_addr != NULL && alignment_hint > 0));
+
+ // In 64K mode, we claim the global page size (os::vm_page_size())
+ // is 64K. This is one of the few points where that illusion may
+ // break, because mmap() will always return memory aligned to 4K. So
+ // we must ensure we only ever return memory aligned to 64k.
+ if (alignment_hint) {
+ alignment_hint = lcm(alignment_hint, os::vm_page_size());
+ } else {
+ alignment_hint = os::vm_page_size();
+ }
+
+ // Size shall always be a multiple of os::vm_page_size (esp. in 64K mode).
+ const size_t size = align_size_up(bytes, os::vm_page_size());
+
+ // alignment: Allocate memory large enough to include an aligned range of the right size and
+ // cut off the leading and trailing waste pages.
+ assert0(alignment_hint != 0 && is_aligned_to(alignment_hint, os::vm_page_size())); // see above
+ const size_t extra_size = size + alignment_hint;
+
+ // Note: MAP_SHARED (instead of MAP_PRIVATE) needed to be able to
+ // later use msync(MS_INVALIDATE) (see os::uncommit_memory).
+ int flags = MAP_ANONYMOUS | MAP_SHARED;
+
+ // MAP_FIXED is needed to enforce requested_addr - manpage is vague about what
+ // it means if wishaddress is given but MAP_FIXED is not set.
+ //
+ // Important! Behaviour differs depending on whether SPEC1170 mode is active or not.
+ // SPEC1170 mode active: behaviour like POSIX, MAP_FIXED will clobber existing mappings.
+ // SPEC1170 mode not active: behaviour, unlike POSIX, is that no existing mappings will
+ // get clobbered.
+ if (requested_addr != NULL) {
+ if (!os::Aix::xpg_sus_mode()) { // not SPEC1170 Behaviour
+ flags |= MAP_FIXED;
+ }
+ }
+
+ char* addr = (char*)::mmap(requested_addr, extra_size,
+ PROT_READ|PROT_WRITE|PROT_EXEC, flags, -1, 0);
+
+ if (addr == MAP_FAILED) {
+ trcVerbose("mmap(" PTR_FORMAT ", " UINTX_FORMAT ", ..) failed (%d)", requested_addr, size, errno);
+ return NULL;
+ }
+
+ // Handle alignment.
+ char* const addr_aligned = (char *)align_ptr_up(addr, alignment_hint);
+ const size_t waste_pre = addr_aligned - addr;
+ char* const addr_aligned_end = addr_aligned + size;
+ const size_t waste_post = extra_size - waste_pre - size;
+ if (waste_pre > 0) {
+ ::munmap(addr, waste_pre);
+ }
+ if (waste_post > 0) {
+ ::munmap(addr_aligned_end, waste_post);
+ }
+ addr = addr_aligned;
+
+ if (addr) {
+ trcVerbose("mmap-allocated " PTR_FORMAT " .. " PTR_FORMAT " (" UINTX_FORMAT " bytes)",
+ addr, addr + bytes, bytes);
+ } else {
+ if (requested_addr != NULL) {
+ trcVerbose("failed to mmap-allocate " UINTX_FORMAT " bytes at wish address " PTR_FORMAT ".", bytes, requested_addr);
+ } else {
+ trcVerbose("failed to mmap-allocate " UINTX_FORMAT " bytes at any address.", bytes);
+ }
+ }
+
+ // bookkeeping
+ vmembk_add(addr, size, SIZE_4K, VMEM_MAPPED);
+
+ // Test alignment, see above.
+ assert0(is_aligned_to(addr, os::vm_page_size()));
+
+ return addr;
+}
+
+static bool release_mmaped_memory(char* addr, size_t size) {
+ assert0(is_aligned_to(addr, os::vm_page_size()));
+ assert0(is_aligned_to(size, os::vm_page_size()));
+
+ trcVerbose("release_mmaped_memory [" PTR_FORMAT " - " PTR_FORMAT "].",
+ addr, addr + size - 1);
+ bool rc = false;
+
+ if (::munmap(addr, size) != 0) {
+ trcVerbose("failed (%d)\n", errno);
+ rc = false;
+ } else {
+ trcVerbose("ok.");
+ rc = true;
+ }
+
+ return rc;
+}
+
+static bool uncommit_mmaped_memory(char* addr, size_t size) {
+
+ assert0(is_aligned_to(addr, os::vm_page_size()));
+ assert0(is_aligned_to(size, os::vm_page_size()));
+
+ trcVerbose("uncommit_mmaped_memory [" PTR_FORMAT " - " PTR_FORMAT "].",
+ addr, addr + size - 1);
+ bool rc = false;
+
+ // Uncommit mmap memory with msync MS_INVALIDATE.
+ if (::msync(addr, size, MS_INVALIDATE) != 0) {
+ trcVerbose("failed (%d)\n", errno);
+ rc = false;
+ } else {
+ trcVerbose("ok.");
+ rc = true;
+ }
+
+ return rc;
+}
// End: shared memory bookkeeping
////////////////////////////////////////////////////////////////////////////////////////////////////
int os::vm_page_size() {
- // Seems redundant as all get out
+ // Seems redundant as all get out.
assert(os::Aix::page_size() != -1, "must call os::init");
return os::Aix::page_size();
}
@@ -2146,91 +2280,76 @@
return os::Aix::page_size();
}
-int os::Aix::commit_memory_impl(char* addr, size_t size, bool exec) {
-
- // Commit is a noop. There is no explicit commit
- // needed on AIX. Memory is committed when touched.
- //
- // Debug : check address range for validity
-#ifdef ASSERT
- LOCK_SHMBK
- ShmBkBlock* const block = shmbk_find_by_containing_address(addr);
- if (!block) {
- fprintf(stderr, "invalid pointer: " INTPTR_FORMAT "\n", addr);
- shmbk_dump_info();
- assert(false, "invalid pointer");
- return false;
- } else if (!block->containsRange(addr, size)) {
- fprintf(stderr, "invalid range: " INTPTR_FORMAT " .. " INTPTR_FORMAT "\n", addr, addr + size);
- shmbk_dump_info();
- assert(false, "invalid range");
- return false;
- }
- UNLOCK_SHMBK
-#endif // ASSERT
-
- return 0;
-}
-
-bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
- return os::Aix::commit_memory_impl(addr, size, exec) == 0;
-}
+#ifdef PRODUCT
+static void warn_fail_commit_memory(char* addr, size_t size, bool exec,
+ int err) {
+ warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT
+ ", %d) failed; error='%s' (errno=%d)", addr, size, exec,
+ strerror(err), err);
+}
+#endif
void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec,
const char* mesg) {
assert(mesg != NULL, "mesg must be specified");
- os::Aix::commit_memory_impl(addr, size, exec);
-}
-
-int os::Aix::commit_memory_impl(char* addr, size_t size,
- size_t alignment_hint, bool exec) {
- return os::Aix::commit_memory_impl(addr, size, exec);
-}
-
-bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
- bool exec) {
- return os::Aix::commit_memory_impl(addr, size, alignment_hint, exec) == 0;
+ if (!pd_commit_memory(addr, size, exec)) {
+ // Add extra info in product mode for vm_exit_out_of_memory():
+ PRODUCT_ONLY(warn_fail_commit_memory(addr, size, exec, errno);)
+ vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
+ }
+}
+
+bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
+
+ assert0(is_aligned_to(addr, os::vm_page_size()));
+ assert0(is_aligned_to(size, os::vm_page_size()));
+
+ vmembk_t* const vmi = vmembk_find(addr);
+ assert0(vmi);
+ vmi->assert_is_valid_subrange(addr, size);
+
+ trcVerbose("commit_memory [" PTR_FORMAT " - " PTR_FORMAT "].", addr, addr + size - 1);
+
+ return true;
+}
+
+bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint, bool exec) {
+ return pd_commit_memory(addr, size, exec);
}
void os::pd_commit_memory_or_exit(char* addr, size_t size,
size_t alignment_hint, bool exec,
const char* mesg) {
- os::Aix::commit_memory_impl(addr, size, alignment_hint, exec);
+ // Alignment_hint is ignored on this OS.
+ pd_commit_memory_or_exit(addr, size, exec, mesg);
}
bool os::pd_uncommit_memory(char* addr, size_t size) {
-
- // Delegate to ShmBkBlock class which knows how to uncommit its memory.
-
- bool rc = false;
- LOCK_SHMBK
- ShmBkBlock* const block = shmbk_find_by_containing_address(addr);
- if (!block) {
- fprintf(stderr, "invalid pointer: 0x%p.\n", addr);
- shmbk_dump_info();
- assert(false, "invalid pointer");
- return false;
- } else if (!block->containsRange(addr, size)) {
- fprintf(stderr, "invalid range: 0x%p .. 0x%p.\n", addr, addr + size);
- shmbk_dump_info();
- assert(false, "invalid range");
- return false;
- }
- rc = block->disclaim(addr, size);
- UNLOCK_SHMBK
-
- if (Verbose && !rc) {
- warning("failed to disclaim 0x%p .. 0x%p (0x%llX bytes).", addr, addr + size, size);
- }
- return rc;
+ assert0(is_aligned_to(addr, os::vm_page_size()));
+ assert0(is_aligned_to(size, os::vm_page_size()));
+
+ // Dynamically do different things for mmap/shmat.
+ const vmembk_t* const vmi = vmembk_find(addr);
+ assert0(vmi);
+ vmi->assert_is_valid_subrange(addr, size);
+
+ if (vmi->type == VMEM_SHMATED) {
+ return uncommit_shmated_memory(addr, size);
+ } else {
+ return uncommit_mmaped_memory(addr, size);
+ }
}
bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
- return os::guard_memory(addr, size);
+ // Do not call this; no need to commit stack pages on AIX.
+ ShouldNotReachHere();
+ return true;
}
bool os::remove_stack_guard_pages(char* addr, size_t size) {
- return os::unguard_memory(addr, size);
+ // Do not call this; no need to commit stack pages on AIX.
+ ShouldNotReachHere();
+ return true;
}
void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
@@ -2273,355 +2392,75 @@
return end;
}
-// Flags for reserve_shmatted_memory:
-#define RESSHM_WISHADDR_OR_FAIL 1
-#define RESSHM_TRY_16M_PAGES 2
-#define RESSHM_16M_PAGES_OR_FAIL 4
-
-// Result of reserve_shmatted_memory:
-struct shmatted_memory_info_t {
- char* addr;
- size_t pagesize;
- bool pinned;
-};
-
-// Reserve a section of shmatted memory.
-// params:
-// bytes [in]: size of memory, in bytes
-// requested_addr [in]: wish address.
-// NULL = no wish.
-// If RESSHM_WISHADDR_OR_FAIL is set in flags and wish address cannot
-// be obtained, function will fail. Otherwise wish address is treated as hint and
-// another pointer is returned.
-// flags [in]: some flags. Valid flags are:
-// RESSHM_WISHADDR_OR_FAIL - fail if wish address is given and cannot be obtained.
-// RESSHM_TRY_16M_PAGES - try to allocate from 16M page pool
-// (requires UseLargePages and Use16MPages)
-// RESSHM_16M_PAGES_OR_FAIL - if you cannot allocate from 16M page pool, fail.
-// Otherwise any other page size will do.
-// p_info [out] : holds information about the created shared memory segment.
-static bool reserve_shmatted_memory(size_t bytes, char* requested_addr, int flags, shmatted_memory_info_t* p_info) {
-
- assert(p_info, "parameter error");
-
- // init output struct.
- p_info->addr = NULL;
-
- // neither should we be here for EXTSHM=ON.
- if (os::Aix::extshm()) {
- ShouldNotReachHere();
- }
-
- // extract flags. sanity checks.
- const bool wishaddr_or_fail =
- flags & RESSHM_WISHADDR_OR_FAIL;
- const bool try_16M_pages =
- flags & RESSHM_TRY_16M_PAGES;
- const bool f16M_pages_or_fail =
- flags & RESSHM_16M_PAGES_OR_FAIL;
-
- // first check: if a wish address is given and it is mandatory, but not aligned to segment boundary,
- // shmat will fail anyway, so save some cycles by failing right away
- if (requested_addr && ((uintptr_t)requested_addr % SIZE_256M == 0)) {
- if (wishaddr_or_fail) {
- return false;
- } else {
- requested_addr = NULL;
- }
- }
-
- char* addr = NULL;
-
- // Align size of shm up to the largest possible page size, to avoid errors later on when we try to change
- // pagesize dynamically.
- const size_t size = align_size_up(bytes, SIZE_16M);
-
- // reserve the shared segment
- int shmid = shmget(IPC_PRIVATE, size, IPC_CREAT | S_IRUSR | S_IWUSR);
- if (shmid == -1) {
- warning("shmget(.., %lld, ..) failed (errno: %d).", size, errno);
- return false;
- }
-
- // Important note:
- // It is very important that we, upon leaving this function, do not leave a shm segment alive.
- // We must right after attaching it remove it from the system. System V shm segments are global and
- // survive the process.
- // So, from here on: Do not assert. Do not return. Always do a "goto cleanup_shm".
-
- // try forcing the page size
- size_t pagesize = -1; // unknown so far
-
- if (UseLargePages) {
-
- struct shmid_ds shmbuf;
- memset(&shmbuf, 0, sizeof(shmbuf));
-
- // First, try to take from 16M page pool if...
- if (os::Aix::can_use_16M_pages() // we can ...
- && Use16MPages // we are not explicitly forbidden to do so (-XX:-Use16MPages)..
- && try_16M_pages) { // caller wants us to.
- shmbuf.shm_pagesize = SIZE_16M;
- if (shmctl(shmid, SHM_PAGESIZE, &shmbuf) == 0) {
- pagesize = SIZE_16M;
- } else {
- warning("Failed to allocate %d 16M pages. 16M page pool might be exhausted. (shmctl failed with %d)",
- size / SIZE_16M, errno);
- if (f16M_pages_or_fail) {
- goto cleanup_shm;
- }
- }
- }
-
- // Nothing yet? Try setting 64K pages. Note that I never saw this fail, but in theory it might,
- // because the 64K page pool may also be exhausted.
- if (pagesize == -1) {
- shmbuf.shm_pagesize = SIZE_64K;
- if (shmctl(shmid, SHM_PAGESIZE, &shmbuf) == 0) {
- pagesize = SIZE_64K;
- } else {
- warning("Failed to allocate %d 64K pages. (shmctl failed with %d)",
- size / SIZE_64K, errno);
- // here I give up. leave page_size -1 - later, after attaching, we will query the
- // real page size of the attached memory. (in theory, it may be something different
- // from 4K if LDR_CNTRL SHM_PSIZE is set)
- }
- }
- }
-
- // sanity point
- assert(pagesize == -1 || pagesize == SIZE_16M || pagesize == SIZE_64K, "wrong page size");
-
- // Now attach the shared segment.
- addr = (char*) shmat(shmid, requested_addr, 0);
- if (addr == (char*)-1) {
- // How to handle attach failure:
- // If it failed for a specific wish address, tolerate this: in that case, if wish address was
- // mandatory, fail, if not, retry anywhere.
- // If it failed for any other reason, treat that as fatal error.
- addr = NULL;
- if (requested_addr) {
- if (wishaddr_or_fail) {
- goto cleanup_shm;
- } else {
- addr = (char*) shmat(shmid, NULL, 0);
- if (addr == (char*)-1) { // fatal
- addr = NULL;
- warning("shmat failed (errno: %d)", errno);
- goto cleanup_shm;
- }
- }
- } else { // fatal
- addr = NULL;
- warning("shmat failed (errno: %d)", errno);
- goto cleanup_shm;
- }
- }
-
- // sanity point
- assert(addr && addr != (char*) -1, "wrong address");
-
- // after successful Attach remove the segment - right away.
- if (::shmctl(shmid, IPC_RMID, NULL) == -1) {
- warning("shmctl(%u, IPC_RMID) failed (%d)\n", shmid, errno);
- guarantee(false, "failed to remove shared memory segment!");
- }
- shmid = -1;
-
- // query the real page size. In case setting the page size did not work (see above), the system
- // may have given us something other then 4K (LDR_CNTRL)
- {
- const size_t real_pagesize = os::Aix::query_pagesize(addr);
- if (pagesize != -1) {
- assert(pagesize == real_pagesize, "unexpected pagesize after shmat");
- } else {
- pagesize = real_pagesize;
- }
- }
-
- // Now register the reserved block with internal book keeping.
- LOCK_SHMBK
- const bool pinned = pagesize >= SIZE_16M ? true : false;
- ShmBkShmatedBlock* const p_block = new ShmBkShmatedBlock(AddrRange(addr, size), pagesize, pinned);
- assert(p_block, "");
- shmbk_register(p_block);
- UNLOCK_SHMBK
-
-cleanup_shm:
-
- // if we have not done so yet, remove the shared memory segment. This is very important.
- if (shmid != -1) {
- if (::shmctl(shmid, IPC_RMID, NULL) == -1) {
- warning("shmctl(%u, IPC_RMID) failed (%d)\n", shmid, errno);
- guarantee(false, "failed to remove shared memory segment!");
- }
- shmid = -1;
- }
-
- // trace
- if (Verbose && !addr) {
- if (requested_addr != NULL) {
- warning("failed to shm-allocate 0x%llX bytes at wish address 0x%p.", size, requested_addr);
- } else {
- warning("failed to shm-allocate 0x%llX bytes at any address.", size);
- }
- }
-
- // hand info to caller
- if (addr) {
- p_info->addr = addr;
- p_info->pagesize = pagesize;
- p_info->pinned = pagesize == SIZE_16M ? true : false;
- }
-
- // sanity test:
- if (requested_addr && addr && wishaddr_or_fail) {
- guarantee(addr == requested_addr, "shmat error");
- }
-
- // just one more test to really make sure we have no dangling shm segments.
- guarantee(shmid == -1, "dangling shm segments");
-
- return addr ? true : false;
-
-} // end: reserve_shmatted_memory
-
-// Reserve memory using mmap. Behaves the same as reserve_shmatted_memory():
-// will return NULL in case of an error.
-static char* reserve_mmaped_memory(size_t bytes, char* requested_addr) {
-
- // if a wish address is given, but not aligned to 4K page boundary, mmap will fail.
- if (requested_addr && ((uintptr_t)requested_addr % os::vm_page_size() != 0)) {
- warning("Wish address 0x%p not aligned to page boundary.", requested_addr);
- return NULL;
- }
-
- const size_t size = align_size_up(bytes, SIZE_4K);
-
- // Note: MAP_SHARED (instead of MAP_PRIVATE) needed to be able to
- // msync(MS_INVALIDATE) (see os::uncommit_memory)
- int flags = MAP_ANONYMOUS | MAP_SHARED;
-
- // MAP_FIXED is needed to enforce requested_addr - manpage is vague about what
- // it means if wishaddress is given but MAP_FIXED is not set.
- //
- // Note however that this changes semantics in SPEC1170 mode insofar as MAP_FIXED
- // clobbers the address range, which is probably not what the caller wants. That's
- // why I assert here (again) that the SPEC1170 compat mode is off.
- // If we want to be able to run under SPEC1170, we have to do some porting and
- // testing.
- if (requested_addr != NULL) {
- assert(!os::Aix::xpg_sus_mode(), "SPEC1170 mode not allowed.");
- flags |= MAP_FIXED;
- }
-
- char* addr = (char*)::mmap(requested_addr, size, PROT_READ|PROT_WRITE|PROT_EXEC, flags, -1, 0);
-
- if (addr == MAP_FAILED) {
- // attach failed: tolerate for specific wish addresses. Not being able to attach
- // anywhere is a fatal error.
- if (requested_addr == NULL) {
- // It's ok to fail here if the machine has not enough memory.
- warning("mmap(NULL, 0x%llX, ..) failed (%d)", size, errno);
- }
- addr = NULL;
- goto cleanup_mmap;
- }
-
- // If we did request a specific address and that address was not available, fail.
- if (addr && requested_addr) {
- guarantee(addr == requested_addr, "unexpected");
- }
-
- // register this mmap'ed segment with book keeping
- LOCK_SHMBK
- ShmBkMappedBlock* const p_block = new ShmBkMappedBlock(AddrRange(addr, size));
- assert(p_block, "");
- shmbk_register(p_block);
- UNLOCK_SHMBK
-
-cleanup_mmap:
-
- // trace
- if (Verbose) {
- if (addr) {
- fprintf(stderr, "mmap-allocated 0x%p .. 0x%p (0x%llX bytes)\n", addr, addr + bytes, bytes);
- }
- else {
- if (requested_addr != NULL) {
- warning("failed to mmap-allocate 0x%llX bytes at wish address 0x%p.", bytes, requested_addr);
- } else {
- warning("failed to mmap-allocate 0x%llX bytes at any address.", bytes);
- }
- }
- }
-
- return addr;
-
-} // end: reserve_mmaped_memory
-
// Reserves and attaches a shared memory segment.
// Will assert if a wish address is given and could not be obtained.
char* os::pd_reserve_memory(size_t bytes, char* requested_addr, size_t alignment_hint) {
- return os::attempt_reserve_memory_at(bytes, requested_addr);
+
+ // All other Unices do a mmap(MAP_FIXED) if the addr is given,
+ // thereby clobbering old mappings at that place. That is probably
+ // not intended, never used and almost certainly an error were it
+ // ever be used this way (to try attaching at a specified address
+ // without clobbering old mappings an alternate API exists,
+ // os::attempt_reserve_memory_at()).
+ // Instead of mimicking the dangerous coding of the other platforms, here I
+ // just ignore the request address (release) or assert(debug).
+ assert0(requested_addr == NULL);
+
+ // Always round to os::vm_page_size(), which may be larger than 4K.
+ bytes = align_size_up(bytes, os::vm_page_size());
+ const size_t alignment_hint0 =
+ alignment_hint ? align_size_up(alignment_hint, os::vm_page_size()) : 0;
+
+ // In 4K mode always use mmap.
+ // In 64K mode allocate small sizes with mmap, large ones with 64K shmatted.
+ if (os::vm_page_size() == SIZE_4K) {
+ return reserve_mmaped_memory(bytes, requested_addr, alignment_hint);
+ } else {
+ if (bytes >= Use64KPagesThreshold) {
+ return reserve_shmated_memory(bytes, requested_addr, alignment_hint);
+ } else {
+ return reserve_mmaped_memory(bytes, requested_addr, alignment_hint);
+ }
+ }
}
bool os::pd_release_memory(char* addr, size_t size) {
- // delegate to ShmBkBlock class which knows how to uncommit its memory.
+ // Dynamically do different things for mmap/shmat.
+ vmembk_t* const vmi = vmembk_find(addr);
+ assert0(vmi);
+
+ // Always round to os::vm_page_size(), which may be larger than 4K.
+ size = align_size_up(size, os::vm_page_size());
+ addr = (char *)align_ptr_up(addr, os::vm_page_size());
bool rc = false;
- LOCK_SHMBK
- ShmBkBlock* const block = shmbk_find_by_containing_address(addr);
- if (!block) {
- fprintf(stderr, "invalid pointer: 0x%p.\n", addr);
- shmbk_dump_info();
- assert(false, "invalid pointer");
- return false;
+ bool remove_bookkeeping = false;
+ if (vmi->type == VMEM_SHMATED) {
+ // For shmatted memory, we do:
+ // - If user wants to release the whole range, release the memory (shmdt).
+ // - If user only wants to release a partial range, uncommit (disclaim) that
+ // range. That way, at least, we do not use memory anymore (bust still page
+ // table space).
+ vmi->assert_is_valid_subrange(addr, size);
+ if (addr == vmi->addr && size == vmi->size) {
+ rc = release_shmated_memory(addr, size);
+ remove_bookkeeping = true;
+ } else {
+ rc = uncommit_shmated_memory(addr, size);
}
- else if (!block->isSameRange(addr, size)) {
- if (block->getType() == ShmBkBlock::MMAP) {
- // Release only the same range or a the beginning or the end of a range.
- if (block->base() == addr && size < block->size()) {
- ShmBkMappedBlock* const b = new ShmBkMappedBlock(AddrRange(block->base() + size, block->size() - size));
- assert(b, "");
- shmbk_register(b);
- block->setAddrRange(AddrRange(addr, size));
- }
- else if (addr > block->base() && addr + size == block->base() + block->size()) {
- ShmBkMappedBlock* const b = new ShmBkMappedBlock(AddrRange(block->base(), block->size() - size));
- assert(b, "");
- shmbk_register(b);
- block->setAddrRange(AddrRange(addr, size));
- }
- else {
- fprintf(stderr, "invalid mmap range: 0x%p .. 0x%p.\n", addr, addr + size);
- shmbk_dump_info();
- assert(false, "invalid mmap range");
- return false;
- }
- }
- else {
- // Release only the same range. No partial release allowed.
- // Soften the requirement a bit, because the user may think he owns a smaller size
- // than the block is due to alignment etc.
- if (block->base() != addr || block->size() < size) {
- fprintf(stderr, "invalid shmget range: 0x%p .. 0x%p.\n", addr, addr + size);
- shmbk_dump_info();
- assert(false, "invalid shmget range");
- return false;
- }
- }
- }
- rc = block->release();
- assert(rc, "release failed");
- // remove block from bookkeeping
- shmbk_unregister(block);
- delete block;
- UNLOCK_SHMBK
-
- if (!rc) {
- warning("failed to released %lu bytes at 0x%p", size, addr);
+ } else {
+ // User may unmap partial regions but region has to be fully contained.
+#ifdef ASSERT
+ vmi->assert_is_valid_subrange(addr, size);
+#endif
+ rc = release_mmaped_memory(addr, size);
+ remove_bookkeeping = true;
+ }
+
+ // update bookkeeping
+ if (rc && remove_bookkeeping) {
+ vmembk_remove(vmi);
}
return rc;
@@ -2702,46 +2541,8 @@
// Enable large page support if OS allows that.
void os::large_page_init() {
-
- // Note: os::Aix::query_multipage_support must run first.
-
- if (!UseLargePages) {
- return;
- }
-
- if (!Aix::can_use_64K_pages()) {
- assert(!Aix::can_use_16M_pages(), "64K is a precondition for 16M.");
- UseLargePages = false;
- return;
- }
-
- if (!Aix::can_use_16M_pages() && Use16MPages) {
- fprintf(stderr, "Cannot use 16M pages. Please ensure that there is a 16M page pool "
- " and that the VM runs with CAP_BYPASS_RAC_VMM and CAP_PROPAGATE capabilities.\n");
- }
-
- // Do not report 16M page alignment as part of os::_page_sizes if we are
- // explicitly forbidden from using 16M pages. Doing so would increase the
- // alignment the garbage collector calculates with, slightly increasing
- // heap usage. We should only pay for 16M alignment if we really want to
- // use 16M pages.
- if (Use16MPages && Aix::can_use_16M_pages()) {
- _large_page_size = SIZE_16M;
- _page_sizes[0] = SIZE_16M;
- _page_sizes[1] = SIZE_64K;
- _page_sizes[2] = SIZE_4K;
- _page_sizes[3] = 0;
- } else if (Aix::can_use_64K_pages()) {
- _large_page_size = SIZE_64K;
- _page_sizes[0] = SIZE_64K;
- _page_sizes[1] = SIZE_4K;
- _page_sizes[2] = 0;
- }
-
- if (Verbose) {
- ("Default large page size is 0x%llX.", _large_page_size);
- }
-} // end: os::large_page_init()
+ return; // Nothing to do. See query_multipage_support and friends.
+}
char* os::reserve_memory_special(size_t bytes, size_t alignment, char* req_addr, bool exec) {
// "exec" is passed in but not used. Creating the shared image for
@@ -2751,7 +2552,7 @@
}
bool os::release_memory_special(char* base, size_t bytes) {
- // detaching the SHM segment will also delete it, see reserve_memory_special()
+ // Detaching the SHM segment will also delete it, see reserve_memory_special().
Unimplemented();
return false;
}
@@ -2761,40 +2562,32 @@
}
bool os::can_commit_large_page_memory() {
- // Well, sadly we cannot commit anything at all (see comment in
- // os::commit_memory) but we claim to so we can make use of large pages
- return true;
+ // Does not matter, we do not support huge pages.
+ return false;
}
bool os::can_execute_large_page_memory() {
- // We can do that
- return true;
+ // Does not matter, we do not support huge pages.
+ return false;
}
// Reserve memory at an arbitrary address, only if that area is
// available (and not reserved for something else).
char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
-
- bool use_mmap = false;
-
- // mmap: smaller graining, no large page support
- // shm: large graining (256M), large page support, limited number of shm segments
- //
- // Prefer mmap wherever we either do not need large page support or have OS limits
-
- if (!UseLargePages || bytes < SIZE_16M) {
- use_mmap = true;
- }
-
char* addr = NULL;
- if (use_mmap) {
- addr = reserve_mmaped_memory(bytes, requested_addr);
+
+ // Always round to os::vm_page_size(), which may be larger than 4K.
+ bytes = align_size_up(bytes, os::vm_page_size());
+
+ // In 4K mode always use mmap.
+ // In 64K mode allocate small sizes with mmap, large ones with 64K shmatted.
+ if (os::vm_page_size() == SIZE_4K) {
+ return reserve_mmaped_memory(bytes, requested_addr, 0);
} else {
- // shmat: wish address is mandatory, and do not try 16M pages here.
- shmatted_memory_info_t info;
- const int flags = RESSHM_WISHADDR_OR_FAIL;
- if (reserve_shmatted_memory(bytes, requested_addr, flags, &info)) {
- addr = info.addr;
+ if (bytes >= Use64KPagesThreshold) {
+ return reserve_shmated_memory(bytes, requested_addr, 0);
+ } else {
+ return reserve_mmaped_memory(bytes, requested_addr, 0);
}
}
@@ -3629,18 +3422,89 @@
// This is called _before_ the most of global arguments have been parsed.
void os::init(void) {
// This is basic, we want to know if that ever changes.
- // (shared memory boundary is supposed to be a 256M aligned)
+ // (Shared memory boundary is supposed to be a 256M aligned.)
assert(SHMLBA == ((uint64_t)0x10000000ULL)/*256M*/, "unexpected");
// First off, we need to know whether we run on AIX or PASE, and
// the OS level we run on.
os::Aix::initialize_os_info();
- // Scan environment (SPEC1170 behaviour, etc)
+ // Scan environment (SPEC1170 behaviour, etc).
os::Aix::scan_environment();
// Check which pages are supported by AIX.
- os::Aix::query_multipage_support();
+ query_multipage_support();
+
+ // Act like we only have one page size by eliminating corner cases which
+ // we did not support very well anyway.
+ // We have two input conditions:
+ // 1) Data segment page size. This is controlled by linker setting (datapsize) on the
+ // launcher, and/or by LDR_CNTRL environment variable. The latter overrules the linker
+ // setting.
+ // Data segment page size is important for us because it defines the thread stack page
+ // size, which is needed for guard page handling, stack banging etc.
+ // 2) The ability to allocate 64k pages dynamically. If this is a given, java heap can
+ // and should be allocated with 64k pages.
+ //
+ // So, we do the following:
+ // LDR_CNTRL can_use_64K_pages_dynamically what we do remarks
+ // 4K no 4K old systems (aix 5.2, as/400 v5r4) or new systems with AME activated
+ // 4k yes 64k (treat 4k stacks as 64k) different loader than java and standard settings
+ // 64k no --- AIX 5.2 ? ---
+ // 64k yes 64k new systems and standard java loader (we set datapsize=64k when linking)
+
+ // We explicitly leave no option to change page size, because only upgrading would work,
+ // not downgrading (if stack page size is 64k you cannot pretend its 4k).
+
+ if (g_multipage_support.datapsize == SIZE_4K) {
+ // datapsize = 4K. Data segment, thread stacks are 4K paged.
+ if (g_multipage_support.can_use_64K_pages) {
+ // .. but we are able to use 64K pages dynamically.
+ // This would be typical for java launchers which are not linked
+ // with datapsize=64K (like, any other launcher but our own).
+ //
+ // In this case it would be smart to allocate the java heap with 64K
+ // to get the performance benefit, and to fake 64k pages for the
+ // data segment (when dealing with thread stacks).
+ //
+ // However, leave a possibility to downgrade to 4K, using
+ // -XX:-Use64KPages.
+ if (Use64KPages) {
+ trcVerbose("64K page mode (faked for data segment)");
+ Aix::_page_size = SIZE_64K;
+ } else {
+ trcVerbose("4K page mode (Use64KPages=off)");
+ Aix::_page_size = SIZE_4K;
+ }
+ } else {
+ // .. and not able to allocate 64k pages dynamically. Here, just
+ // fall back to 4K paged mode and use mmap for everything.
+ trcVerbose("4K page mode");
+ Aix::_page_size = SIZE_4K;
+ FLAG_SET_ERGO(bool, Use64KPages, false);
+ }
+ } else {
+ // datapsize = 64k. Data segment, thread stacks are 64k paged.
+ // This normally means that we can allocate 64k pages dynamically.
+ // (There is one special case where this may be false: EXTSHM=on.
+ // but we decided to not support that mode).
+ assert0(g_multipage_support.can_use_64K_pages);
+ Aix::_page_size = SIZE_64K;
+ trcVerbose("64K page mode");
+ FLAG_SET_ERGO(bool, Use64KPages, true);
+ }
+
+ // Short-wire stack page size to base page size; if that works, we just remove
+ // that stack page size altogether.
+ Aix::_stack_page_size = Aix::_page_size;
+
+ // For now UseLargePages is just ignored.
+ FLAG_SET_ERGO(bool, UseLargePages, false);
+ _page_sizes[0] = 0;
+ _large_page_size = -1;
+
+ // debug trace
+ trcVerbose("os::vm_page_size %s\n", describe_pagesize(os::vm_page_size()));
// Next, we need to initialize libo4 and libperfstat libraries.
if (os::Aix::on_pase()) {
@@ -3658,34 +3522,6 @@
// need libperfstat etc.
os::Aix::initialize_system_info();
- // Initialize large page support.
- if (UseLargePages) {
- os::large_page_init();
- if (!UseLargePages) {
- // initialize os::_page_sizes
- _page_sizes[0] = Aix::page_size();
- _page_sizes[1] = 0;
- if (Verbose) {
- fprintf(stderr, "Large Page initialization failed: setting UseLargePages=0.\n");
- }
- }
- } else {
- // initialize os::_page_sizes
- _page_sizes[0] = Aix::page_size();
- _page_sizes[1] = 0;
- }
-
- // debug trace
- if (Verbose) {
- fprintf(stderr, "os::vm_page_size 0x%llX\n", os::vm_page_size());
- fprintf(stderr, "os::large_page_size 0x%llX\n", os::large_page_size());
- fprintf(stderr, "os::_page_sizes = ( ");
- for (int i = 0; _page_sizes[i]; i ++) {
- fprintf(stderr, " %s ", describe_pagesize(_page_sizes[i]));
- }
- fprintf(stderr, ")\n");
- }
-
_initial_pid = getpid();
clock_tics_per_sec = sysconf(_SC_CLK_TCK);
@@ -3699,6 +3535,15 @@
initial_time_count = os::elapsed_counter();
pthread_mutex_init(&dl_mutex, NULL);
+
+ // If the pagesize of the VM is greater than 8K determine the appropriate
+ // number of initial guard pages. The user can change this with the
+ // command line arguments, if needed.
+ if (vm_page_size() > (int)Aix::vm_default_page_size()) {
+ StackYellowPages = 1;
+ StackRedPages = 1;
+ StackShadowPages = round_to((StackShadowPages*Aix::vm_default_page_size()), vm_page_size()) / vm_page_size();
+ }
}
// This is called _after_ the global arguments have been parsed.
@@ -3717,7 +3562,7 @@
const int prot = PROT_READ;
const int flags = MAP_PRIVATE|MAP_ANONYMOUS;
- // use optimized addresses for the polling page,
+ // Use optimized addresses for the polling page,
// e.g. map it to a special 32-bit address.
if (OptimizePollingPageLocation) {
// architecture-specific list of address wishes:
@@ -3739,7 +3584,7 @@
// iterate over the list of address wishes:
for (int i=0; i<address_wishes_length; i++) {
- // try to map with current address wish.
+ // Try to map with current address wish.
// AIX: AIX needs MAP_FIXED if we provide an address and mmap will
// fail if the address is already mapped.
map_address = (address) ::mmap(address_wishes[i] - (ssize_t)page_size,
@@ -3752,7 +3597,7 @@
}
if (map_address + (ssize_t)page_size == address_wishes[i]) {
- // map succeeded and map_address is at wished address, exit loop.
+ // Map succeeded and map_address is at wished address, exit loop.
break;
}
@@ -3761,7 +3606,7 @@
::munmap(map_address, map_size);
map_address = (address) MAP_FAILED;
}
- // map failed, continue loop.
+ // Map failed, continue loop.
}
} // end OptimizePollingPageLocation
@@ -3777,8 +3622,9 @@
os::set_memory_serialize_page(mem_serialize_page);
#ifndef PRODUCT
- if (Verbose && PrintMiscellaneous)
+ if (Verbose && PrintMiscellaneous) {
tty->print("[Memory Serialize Page address: " INTPTR_FORMAT "]\n", (intptr_t)mem_serialize_page);
+ }
#endif
}
@@ -3797,16 +3643,18 @@
// Add in 2*BytesPerWord times page size to account for VM stack during
// class initialization depending on 32 or 64 bit VM.
os::Aix::min_stack_allowed = MAX2(os::Aix::min_stack_allowed,
- (size_t)(StackYellowPages+StackRedPages+StackShadowPages +
- 2*BytesPerWord COMPILER2_PRESENT(+1)) * Aix::page_size());
+ (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Aix::page_size() +
+ (2*BytesPerWord COMPILER2_PRESENT(+1)) * Aix::vm_default_page_size());
+
+ os::Aix::min_stack_allowed = align_size_up(os::Aix::min_stack_allowed, os::Aix::page_size());
size_t threadStackSizeInBytes = ThreadStackSize * K;
if (threadStackSizeInBytes != 0 &&
threadStackSizeInBytes < os::Aix::min_stack_allowed) {
- tty->print_cr("\nThe stack size specified is too small, "
- "Specify at least %dk",
- os::Aix::min_stack_allowed / K);
- return JNI_ERR;
+ tty->print_cr("\nThe stack size specified is too small, "
+ "Specify at least %dk",
+ os::Aix::min_stack_allowed / K);
+ return JNI_ERR;
}
// Make the stack size a multiple of the page size so that
@@ -3817,7 +3665,7 @@
Aix::libpthread_init();
if (MaxFDLimit) {
- // set the number of file descriptors to max. print out error
+ // Set the number of file descriptors to max. print out error
// if getrlimit/setrlimit fails but continue regardless.
struct rlimit nbr_files;
int status = getrlimit(RLIMIT_NOFILE, &nbr_files);
@@ -3835,12 +3683,12 @@
}
if (PerfAllowAtExitRegistration) {
- // only register atexit functions if PerfAllowAtExitRegistration is set.
- // atexit functions can be delayed until process exit time, which
+ // Only register atexit functions if PerfAllowAtExitRegistration is set.
+ // Atexit functions can be delayed until process exit time, which
// can be problematic for embedded VM situations. Embedded VMs should
// call DestroyJavaVM() to assure that VM resources are released.
- // note: perfMemory_exit_helper atexit function may be removed in
+ // Note: perfMemory_exit_helper atexit function may be removed in
// the future if the appropriate cleanup code can be added to the
// VM_Exit VMOperation's doit method.
if (atexit(perfMemory_exit_helper) != 0) {
@@ -4162,8 +4010,10 @@
if (read_only) {
prot = PROT_READ;
+ flags = MAP_SHARED;
} else {
prot = PROT_READ | PROT_WRITE;
+ flags = MAP_PRIVATE;
}
if (allow_exec) {
@@ -4174,7 +4024,12 @@
flags |= MAP_FIXED;
}
- char* mapped_address = (char*)mmap(addr, (size_t)bytes, prot, flags,
+ // Allow anonymous mappings if 'fd' is -1.
+ if (fd == -1) {
+ flags |= MAP_ANONYMOUS;
+ }
+
+ char* mapped_address = (char*)::mmap(addr, (size_t)bytes, prot, flags,
fd, file_offset);
if (mapped_address == MAP_FAILED) {
return NULL;
@@ -4432,7 +4287,7 @@
if (Verbose) {
fprintf(stderr, "EXTSHM=%s.\n", p ? p : "<unset>");
}
- if (p && strcmp(p, "ON") == 0) {
+ if (p && strcasecmp(p, "ON") == 0) {
fprintf(stderr, "Unsupported setting: EXTSHM=ON. Large Page support will be disabled.\n");
_extshm = 1;
} else {
@@ -4493,16 +4348,13 @@
/////////////////////////////////////////////////////////////////////////////
// thread stack
-// function to query the current stack size using pthread_getthrds_np
-//
-// ! do not change anything here unless you know what you are doing !
-static void query_stack_dimensions(address* p_stack_base, size_t* p_stack_size) {
-
+// Function to query the current stack size using pthread_getthrds_np.
+static bool query_stack_dimensions(address* p_stack_base, size_t* p_stack_size) {
// This only works when invoked on a pthread. As we agreed not to use
- // primordial threads anyway, I assert here
+ // primordial threads anyway, I assert here.
guarantee(!os::Aix::is_primordial_thread(), "not allowed on the primordial thread");
- // information about this api can be found (a) in the pthread.h header and
+ // Information about this api can be found (a) in the pthread.h header and
// (b) in http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/pthread_getthrds_np.htm
//
// The use of this API to find out the current stack is kind of undefined.
@@ -4513,57 +4365,72 @@
pthread_t tid = pthread_self();
struct __pthrdsinfo pinfo;
- char dummy[1]; // we only need this to satisfy the api and to not get E
+ char dummy[1]; // We only need this to satisfy the api and to not get E.
int dummy_size = sizeof(dummy);
memset(&pinfo, 0, sizeof(pinfo));
- const int rc = pthread_getthrds_np (&tid, PTHRDSINFO_QUERY_ALL, &pinfo,
- sizeof(pinfo), dummy, &dummy_size);
+ const int rc = pthread_getthrds_np(&tid, PTHRDSINFO_QUERY_ALL, &pinfo,
+ sizeof(pinfo), dummy, &dummy_size);
if (rc != 0) {
- fprintf(stderr, "pthread_getthrds_np failed (%d)\n", rc);
- guarantee(0, "pthread_getthrds_np failed");
- }
-
- guarantee(pinfo.__pi_stackend, "returned stack base invalid");
-
- // the following can happen when invoking pthread_getthrds_np on a pthread running on a user provided stack
- // (when handing down a stack to pthread create, see pthread_attr_setstackaddr).
+ assert0(false);
+ trcVerbose("pthread_getthrds_np failed (%d)", rc);
+ return false;
+ }
+ guarantee0(pinfo.__pi_stackend);
+
+ // The following can happen when invoking pthread_getthrds_np on a pthread running
+ // on a user provided stack (when handing down a stack to pthread create, see
+ // pthread_attr_setstackaddr).
// Not sure what to do here - I feel inclined to forbid this use case completely.
- guarantee(pinfo.__pi_stacksize, "returned stack size invalid");
-
- // On AIX, stacks are not necessarily page aligned so round the base and size accordingly
+ guarantee0(pinfo.__pi_stacksize);
+
+ // Note: the pthread stack on AIX seems to look like this:
+ //
+ // --------------------- real base ? at page border ?
+ //
+ // pthread internal data, like ~2K, see also
+ // http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.prftungd/doc/prftungd/thread_supp_tun_params.htm
+ //
+ // --------------------- __pi_stackend - not page aligned, (xxxxF890)
+ //
+ // stack
+ // ....
+ //
+ // stack
+ //
+ // --------------------- __pi_stackend - __pi_stacksize
+ //
+ // padding due to AIX guard pages (?) see AIXTHREAD_GUARDPAGES
+ // --------------------- __pi_stackaddr (page aligned if AIXTHREAD_GUARDPAGES > 0)
+ //
+ // AIX guard pages (?)
+ //
+
+ // So, the safe thing to do is to use the area from __pi_stackend to __pi_stackaddr;
+ // __pi_stackend however is almost never page aligned.
+ //
+
if (p_stack_base) {
- (*p_stack_base) = (address) align_size_up((intptr_t)pinfo.__pi_stackend, os::Aix::stack_page_size());
+ (*p_stack_base) = (address) (pinfo.__pi_stackend);
}
if (p_stack_size) {
- (*p_stack_size) = pinfo.__pi_stacksize - os::Aix::stack_page_size();
- }
-
-#ifndef PRODUCT
- if (Verbose) {
- fprintf(stderr,
- "query_stack_dimensions() -> real stack_base=" INTPTR_FORMAT ", real stack_addr=" INTPTR_FORMAT
- ", real stack_size=" INTPTR_FORMAT
- ", stack_base=" INTPTR_FORMAT ", stack_size=" INTPTR_FORMAT "\n",
- (intptr_t)pinfo.__pi_stackend, (intptr_t)pinfo.__pi_stackaddr, pinfo.__pi_stacksize,
- (intptr_t)align_size_up((intptr_t)pinfo.__pi_stackend, os::Aix::stack_page_size()),
- pinfo.__pi_stacksize - os::Aix::stack_page_size());
- }
-#endif
-
-} // end query_stack_dimensions
-
-// get the current stack base from the OS (actually, the pthread library)
+ (*p_stack_size) = pinfo.__pi_stackend - pinfo.__pi_stackaddr;
+ }
+
+ return true;
+}
+
+// Get the current stack base from the OS (actually, the pthread library).
address os::current_stack_base() {
address p;
query_stack_dimensions(&p, 0);
return p;
}
-// get the current stack size from the OS (actually, the pthread library)
+// Get the current stack size from the OS (actually, the pthread library).
size_t os::current_stack_size() {
size_t s;
query_stack_dimensions(0, &s);
--- a/hotspot/src/os/aix/vm/os_aix.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/os_aix.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,9 +35,9 @@
friend class os;
// For signal-chaining
- // highest so far (AIX 5.2) is SIGSAK (63)
+ // highest so far (AIX 5.2 - 6.1) is SIGSAK (63)
#define MAXSIGNUM 63
- // length of strings included in the libperfstat structures
+ // Length of strings included in the libperfstat structures.
#define IDENTIFIER_LENGTH 64
static struct sigaction sigact[MAXSIGNUM]; // saved preinstalled sigactions
@@ -111,22 +111,6 @@
// (should be LDR_CNTRL DATAPSIZE because stack is allocated on heap by pthread lib)
static int _stack_page_size;
- // Default shm page size. Read: what page size shared memory will be backed
- // with if no page size was set explicitly using shmctl(SHM_PAGESIZE).
- // Should be LDR_CNTRL SHMPSIZE.
- static size_t _shm_default_page_size;
-
- // True if sys V shm can be used with 64K pages dynamically.
- // (via shmctl(.. SHM_PAGESIZE..). Should be true for AIX 53 and
- // newer / PASE V6R1 and newer. (0 or 1, -1 if not initialized)
- static int _can_use_64K_pages;
-
- // True if sys V shm can be used with 16M pages dynamically.
- // (via shmctl(.. SHM_PAGESIZE..). Only true on AIX 5.3 and
- // newer, if the system was set up to use 16M pages and the
- // jvm has enough user rights. (0 or 1, -1 if not initialized)
- static int _can_use_16M_pages;
-
static julong available_memory();
static julong physical_memory() { return _physical_memory; }
static void initialize_system_info();
@@ -135,10 +119,6 @@
// one of Aix::on_pase(), Aix::os_version().
static void initialize_os_info();
- static int commit_memory_impl(char* addr, size_t bytes, bool exec);
- static int commit_memory_impl(char* addr, size_t bytes,
- size_t alignment_hint, bool exec);
-
// Scan environment for important settings which might effect the
// VM. Trace out settings. Warn about invalid settings and/or
// correct them.
@@ -146,10 +126,6 @@
// Must run after os::Aix::initialue_os_info().
static void scan_environment();
- // Retrieve information about multipage size support. Will initialize
- // _page_size, _stack_page_size, _can_use_64K_pages/_can_use_16M_pages
- static void query_multipage_support();
-
// Initialize libo4 (on PASE) and libperfstat (on AIX). Call this
// before relying on functions from either lib, e.g. Aix::get_meminfo().
static void initialize_libo4();
@@ -187,27 +163,8 @@
return _stack_page_size;
}
- // default shm page size. Read: what page size shared memory
- // will be backed with if no page size was set explicitly using shmctl(SHM_PAGESIZE).
- // Should be LDR_CNTRL SHMPSIZE.
- static int shm_default_page_size(void) {
- assert(_shm_default_page_size != -1, "not initialized");
- return _shm_default_page_size;
- }
-
- // Return true if sys V shm can be used with 64K pages dynamically
- // (via shmctl(.. SHM_PAGESIZE..).
- static bool can_use_64K_pages () {
- assert(_can_use_64K_pages != -1, "not initialized");
- return _can_use_64K_pages == 1 ? true : false;
- }
-
- // Return true if sys V shm can be used with 16M pages dynamically.
- // (via shmctl(.. SHM_PAGESIZE..).
- static bool can_use_16M_pages () {
- assert(_can_use_16M_pages != -1, "not initialized");
- return _can_use_16M_pages == 1 ? true : false;
- }
+ // This is used to scale stack space (guard pages etc.). The name is somehow misleading.
+ static int vm_default_page_size(void ) { return 8*K; }
static address ucontext_get_pc(const ucontext_t* uc);
static intptr_t* ucontext_get_sp(ucontext_t* uc);
@@ -269,6 +226,11 @@
return _os_version;
}
+ // Convenience method: returns true if running on PASE V5R4 or older.
+ static bool on_pase_V5R4_or_older() {
+ return on_pase() && os_version() <= 0x0504;
+ }
+
// Convenience method: returns true if running on AIX 5.3 or older.
static bool on_aix_53_or_older() {
return on_aix() && os_version() <= 0x0503;
--- a/hotspot/src/os/aix/vm/os_aix.inline.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/os_aix.inline.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,7 @@
return pthread_getspecific((pthread_key_t)index);
}
-// File names are case-sensitive on windows only
+// File names are case-sensitive on windows only.
inline int os::file_name_strcmp(const char* s1, const char* s2) {
return strcmp(s1, s2);
}
@@ -53,18 +53,19 @@
return true;
}
+// Whether or not calling code should/can commit/uncommit stack pages
+// before guarding them. Answer for AIX is definitly no, because memory
+// is automatically committed on touch.
inline bool os::allocate_stack_guard_pages() {
assert(uses_stack_guard_pages(), "sanity check");
- return true;
+ return false;
}
-
// On Aix, reservations are made on a page by page basis, nothing to do.
inline void os::pd_split_reserved_memory(char *base, size_t size,
size_t split, bool realloc) {
}
-
// Bang the shadow pages if they need to be touched to be mapped.
inline void os::bang_stack_shadow_pages() {
}
@@ -75,15 +76,13 @@
inline const int os::default_file_open_flags() { return 0;}
-inline DIR* os::opendir(const char* dirname)
-{
+inline DIR* os::opendir(const char* dirname) {
assert(dirname != NULL, "just checking");
return ::opendir(dirname);
}
-inline int os::readdir_buf_size(const char *path)
-{
- // according to aix sys/limits, NAME_MAX must be retrieved at runtime. */
+inline int os::readdir_buf_size(const char *path) {
+ // According to aix sys/limits, NAME_MAX must be retrieved at runtime.
const long my_NAME_MAX = pathconf(path, _PC_NAME_MAX);
return my_NAME_MAX + sizeof(dirent) + 1;
}
@@ -104,8 +103,7 @@
return ::ftruncate64(fd, length);
}
-inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
-{
+inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf) {
dirent* p;
int status;
assert(dirp != NULL, "just checking");
@@ -174,11 +172,11 @@
RESTARTABLE_RETURN_INT(::send(fd, buf, nBytes, flags));
}
-inline int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
+inline int os::raw_send(int fd, char *buf, size_t nBytes, uint flags) {
return os::send(fd, buf, nBytes, flags);
}
-inline int os::connect(int fd, struct sockaddr* him, socklen_t len) {
+inline int os::connect(int fd, struct sockaddr *him, socklen_t len) {
RESTARTABLE_RETURN_INT(::connect(fd, him, len));
}
--- a/hotspot/src/os/aix/vm/perfMemory_aix.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/perfMemory_aix.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -797,7 +797,7 @@
// Close the directory and reset the current working directory.
close_directory_secure_cwd(dirp, saved_cwd_fd);
- FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
+ FREE_C_HEAP_ARRAY(char, dbuf);
}
// Make the user specific temporary directory. Returns true if
@@ -1164,9 +1164,9 @@
// store file, we don't follow them when attaching either.
//
if (!is_directory_secure(dirname)) {
- FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
+ FREE_C_HEAP_ARRAY(char, dirname);
if (luser != user) {
- FREE_C_HEAP_ARRAY(char, luser, mtInternal);
+ FREE_C_HEAP_ARRAY(char, luser);
}
THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
"Process not found");
--- a/hotspot/src/os/aix/vm/porting_aix.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/os/aix/vm/porting_aix.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,18 @@
*
*/
+#ifndef OS_AIX_VM_PORTING_AIX_HPP
+#define OS_AIX_VM_PORTING_AIX_HPP
+
#include <stddef.h>
+// PPC port only:
+#define assert0(b) assert( (b), "" )
+#define guarantee0(b) assert( (b), "" )
+template <class T1, class T2> bool is_aligned_to(T1 what, T2 alignment) {
+ return ( ((uintx)(what)) & (((uintx)(alignment)) - 1) ) == 0 ? true : false;
+}
+
// Header file to contain porting-relevant code which does not have a
// home anywhere else and which can not go into os_<platform>.h because
// that header is included inside the os class definition, hence all
@@ -79,3 +89,62 @@
const struct tbtable** p_tb, // [out] optional: ptr to traceback table to get further information
char* p_errmsg, size_t errmsglen // [out] optional: user provided buffer for error messages
);
+
+// -------------------------------------------------------------------------
+
+// A simple critical section which shall be based upon OS critical
+// sections (CRITICAL_SECTION resp. Posix Mutex) and nothing else.
+
+#include <pthread.h>
+
+namespace MiscUtils {
+ typedef pthread_mutex_t critsect_t;
+
+ inline void init_critsect(MiscUtils::critsect_t* cs) {
+ pthread_mutex_init(cs, NULL);
+ }
+ inline void free_critsect(MiscUtils::critsect_t* cs) {
+ pthread_mutex_destroy(cs);
+ }
+ inline void enter_critsect(MiscUtils::critsect_t* cs) {
+ pthread_mutex_lock(cs);
+ }
+ inline void leave_critsect(MiscUtils::critsect_t* cs) {
+ pthread_mutex_unlock(cs);
+ }
+
+ // Need to wrap this in an object because we need to dynamically initialize
+ // critical section (because of windows, where there is no way to initialize
+ // a CRITICAL_SECTION statically. On Unix, we could use
+ // PTHREAD_MUTEX_INITIALIZER)
+
+ // Note: The critical section does NOT get cleaned up in the destructor. That is
+ // by design: the CritSect class is only ever used as global objects whose
+ // lifetime spans the whole VM life; in that context we don't want the lock to
+ // be cleaned up when global C++ objects are destroyed, but to continue to work
+ // correctly right to the very end of the process life.
+ class CritSect {
+ critsect_t _cs;
+ public:
+ CritSect() { init_critsect(&_cs); }
+ //~CritSect() { free_critsect(&_cs); }
+ void enter() { enter_critsect(&_cs); }
+ void leave() { leave_critsect(&_cs); }
+ };
+
+ class AutoCritSect {
+ CritSect* const _pcsobj;
+ public:
+ AutoCritSect(CritSect* pcsobj)
+ : _pcsobj(pcsobj)
+ {
+ _pcsobj->enter();
+ }
+ ~AutoCritSect() {
+ _pcsobj->leave();
+ }
+ };
+
+}
+
+#endif // OS_AIX_VM_PORTING_AIX_HPP
--- a/hotspot/src/share/vm/adlc/adlparse.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/adlparse.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -800,6 +800,7 @@
}
if (strcmp(token,"reg_def")==0) { reg_def_parse(); }
else if (strcmp(token,"reg_class")==0) { reg_class_parse(); }
+ else if (strcmp(token, "reg_class_dynamic") == 0) { reg_class_dynamic_parse(); }
else if (strcmp(token,"alloc_class")==0) { alloc_class_parse(); }
else if (strcmp(token,"#define")==0) { preproc_define(); }
else { parse_err(SYNERR, "bad token %s inside register block.\n", token); break; }
@@ -2323,11 +2324,12 @@
// Debug Stuff
if (_AD._adl_debug >1) fprintf(stderr,"Register Class: %s\n", cname);
- RegClass *reg_class = _AD._register->addRegClass(cname);
-
- // Collect registers in class
skipws();
if (_curchar == '(') {
+ // A register list is defined for the register class.
+ // Collect registers into a generic RegClass register class.
+ RegClass* reg_class = _AD._register->addRegClass<RegClass>(cname);
+
next_char(); // Skip '('
skipws();
while (_curchar != ')') {
@@ -2352,12 +2354,15 @@
}
next_char(); // Skip closing ')'
} else if (_curchar == '%') {
+ // A code snippet is defined for the register class.
+ // Collect the code snippet into a CodeSnippetRegClass register class.
+ CodeSnippetRegClass* reg_class = _AD._register->addRegClass<CodeSnippetRegClass>(cname);
char *code = find_cpp_block("reg class");
if (code == NULL) {
parse_err(SYNERR, "missing code declaration for reg class.\n");
return;
}
- reg_class->_user_defined = code;
+ reg_class->set_code_snippet(code);
return;
}
@@ -2374,6 +2379,87 @@
return;
}
+//------------------------------reg_class_dynamic_parse------------------------
+void ADLParser::reg_class_dynamic_parse(void) {
+ char *cname; // Name of dynamic register class being defined
+
+ // Get register class name
+ skipws();
+ cname = get_ident();
+ if (cname == NULL) {
+ parse_err(SYNERR, "missing dynamic register class name after 'reg_class_dynamic'\n");
+ return;
+ }
+
+ if (_AD._adl_debug > 1) {
+ fprintf(stdout, "Dynamic Register Class: %s\n", cname);
+ }
+
+ skipws();
+ if (_curchar != '(') {
+ parse_err(SYNERR, "missing '(' at the beginning of reg_class_dynamic definition\n");
+ return;
+ }
+ next_char();
+ skipws();
+
+ // Collect two register classes and the C++ code representing the condition code used to
+ // select between the two classes into a ConditionalRegClass register class.
+ ConditionalRegClass* reg_class = _AD._register->addRegClass<ConditionalRegClass>(cname);
+ int i;
+ for (i = 0; i < 2; i++) {
+ char* name = get_ident();
+ if (name == NULL) {
+ parse_err(SYNERR, "missing class identifier inside reg_class_dynamic list.\n");
+ return;
+ }
+ RegClass* rc = _AD._register->getRegClass(name);
+ if (rc == NULL) {
+ parse_err(SEMERR, "unknown identifier %s inside reg_class_dynamic list.\n", name);
+ } else {
+ reg_class->set_rclass_at_index(i, rc);
+ }
+
+ skipws();
+ if (_curchar == ',') {
+ next_char();
+ skipws();
+ } else {
+ parse_err(SYNERR, "missing separator ',' inside reg_class_dynamic list.\n");
+ }
+ }
+
+ // Collect the condition code.
+ skipws();
+ if (_curchar == '%') {
+ char* code = find_cpp_block("reg class dynamic");
+ if (code == NULL) {
+ parse_err(SYNERR, "missing code declaration for reg_class_dynamic.\n");
+ return;
+ }
+ reg_class->set_condition_code(code);
+ } else {
+ parse_err(SYNERR, "missing %% at the beginning of code block in reg_class_dynamic definition\n");
+ return;
+ }
+
+ skipws();
+ if (_curchar != ')') {
+ parse_err(SYNERR, "missing ')' at the end of reg_class_dynamic definition\n");
+ return;
+ }
+ next_char();
+
+ skipws();
+ if (_curchar != ';') {
+ parse_err(SYNERR, "missing ';' at the end of reg_class_dynamic definition.\n");
+ return;
+ }
+ next_char(); // Skip trailing ';'
+
+ return;
+}
+
//------------------------------alloc_class_parse------------------------------
void ADLParser::alloc_class_parse(void) {
char *name; // Name of allocation class being defined
--- a/hotspot/src/share/vm/adlc/adlparse.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/adlparse.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -53,6 +53,8 @@
// ***** Register Section *****
class RegDef;
class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
class AllocClass;
class ResourceForm;
// ***** Pipeline Section *****
@@ -125,6 +127,7 @@
// Parse components of the register section
void reg_def_parse(void); // Parse register definition
void reg_class_parse(void); // Parse register class definition
+ void reg_class_dynamic_parse(void); // Parse dynamic register class definition
void alloc_class_parse(void); // Parse allocation class definition
// Parse components of the definition section
--- a/hotspot/src/share/vm/adlc/archDesc.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/archDesc.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -908,7 +908,7 @@
void ArchDesc::set_stack_or_reg(const char *reg_class_name) {
if( _register ) {
RegClass *reg_class = _register->getRegClass(reg_class_name);
- reg_class->_stack_or_reg = true;
+ reg_class->set_stack_version(true);
}
}
--- a/hotspot/src/share/vm/adlc/forms.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/forms.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -68,6 +68,8 @@
class InsEncode;
class RegDef;
class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
class AllocClass;
class ResourceForm;
class PipeClassForm;
--- a/hotspot/src/share/vm/adlc/formsopt.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/formsopt.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -47,13 +47,19 @@
}
// record a new register class
-RegClass *RegisterForm::addRegClass(const char *className) {
- RegClass *regClass = new RegClass(className);
+template <typename T>
+T* RegisterForm::addRegClass(const char* className) {
+ T* regClass = new T(className);
_rclasses.addName(className);
- _regClass.Insert(className,regClass);
+ _regClass.Insert(className, regClass);
return regClass;
}
+// Explicit instantiation for all supported register classes.
+template RegClass* RegisterForm::addRegClass<RegClass>(const char* className);
+template CodeSnippetRegClass* RegisterForm::addRegClass<CodeSnippetRegClass>(const char* className);
+template ConditionalRegClass* RegisterForm::addRegClass<ConditionalRegClass>(const char* className);
+
// record a new register class
AllocClass *RegisterForm::addAllocClass(char *className) {
AllocClass *allocClass = new AllocClass(className);
@@ -67,9 +73,9 @@
void RegisterForm::addSpillRegClass() {
// Stack slots start at the next available even register number.
_reg_ctr = (_reg_ctr+7) & ~7;
- const char *rc_name = "stack_slots";
- RegClass *reg_class = new RegClass(rc_name);
- reg_class->_stack_or_reg = true;
+ const char *rc_name = "stack_slots";
+ RegClass* reg_class = new RegClass(rc_name);
+ reg_class->set_stack_version(true);
_rclasses.addName(rc_name);
_regClass.Insert(rc_name,reg_class);
}
@@ -224,9 +230,11 @@
//------------------------------RegClass---------------------------------------
// Construct a register class into which registers will be inserted
-RegClass::RegClass(const char *classid) : _stack_or_reg(false), _classid(classid), _regDef(cmpstr,hashstr, Form::arena),
- _user_defined(NULL)
-{
+RegClass::RegClass(const char* classid) : _stack_or_reg(false), _classid(classid), _regDef(cmpstr, hashstr, Form::arena) {
+}
+
+RegClass::~RegClass() {
+ delete _classid;
}
// record a register in this class
@@ -305,6 +313,91 @@
fprintf(fp,"--- done with entries for reg_class %s\n\n",_classid);
}
+void RegClass::declare_register_masks(FILE* fp) {
+ const char* prefix = "";
+ const char* rc_name_to_upper = toUpper(_classid);
+ fprintf(fp, "extern const RegMask _%s%s_mask;\n", prefix, rc_name_to_upper);
+ fprintf(fp, "inline const RegMask &%s%s_mask() { return _%s%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
+ if (_stack_or_reg) {
+ fprintf(fp, "extern const RegMask _%sSTACK_OR_%s_mask;\n", prefix, rc_name_to_upper);
+ fprintf(fp, "inline const RegMask &%sSTACK_OR_%s_mask() { return _%sSTACK_OR_%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
+ }
+ delete[] rc_name_to_upper;
+}
+
+void RegClass::build_register_masks(FILE* fp) {
+ int len = RegisterForm::RegMask_Size();
+ const char *prefix = "";
+ const char* rc_name_to_upper = toUpper(_classid);
+ fprintf(fp, "const RegMask _%s%s_mask(", prefix, rc_name_to_upper);
+
+ int i;
+ for(i = 0; i < len - 1; i++) {
+ fprintf(fp," 0x%x,", regs_in_word(i, false));
+ }
+ fprintf(fp," 0x%x );\n", regs_in_word(i, false));
+
+ if (_stack_or_reg) {
+ fprintf(fp, "const RegMask _%sSTACK_OR_%s_mask(", prefix, rc_name_to_upper);
+ for(i = 0; i < len - 1; i++) {
+ fprintf(fp," 0x%x,", regs_in_word(i, true));
+ }
+ fprintf(fp," 0x%x );\n", regs_in_word(i, true));
+ }
+ delete[] rc_name_to_upper;
+}
+
+//------------------------------CodeSnippetRegClass---------------------------
+CodeSnippetRegClass::CodeSnippetRegClass(const char* classid) : RegClass(classid), _code_snippet(NULL) {
+}
+
+CodeSnippetRegClass::~CodeSnippetRegClass() {
+ delete _code_snippet;
+}
+
+void CodeSnippetRegClass::declare_register_masks(FILE* fp) {
+ const char* prefix = "";
+ const char* rc_name_to_upper = toUpper(_classid);
+ fprintf(fp, "inline const RegMask &%s%s_mask() { %s }\n", prefix, rc_name_to_upper, _code_snippet);
+ delete[] rc_name_to_upper;
+}
+
+//------------------------------ConditionalRegClass---------------------------
+ConditionalRegClass::ConditionalRegClass(const char *classid) : RegClass(classid), _condition_code(NULL) {
+}
+
+ConditionalRegClass::~ConditionalRegClass() {
+ delete _condition_code;
+}
+
+void ConditionalRegClass::declare_register_masks(FILE* fp) {
+ const char* prefix = "";
+ const char* rc_name_to_upper = toUpper(_classid);
+ const char* rclass_0_to_upper = toUpper(_rclasses[0]->_classid);
+ const char* rclass_1_to_upper = toUpper(_rclasses[1]->_classid);
+ fprintf(fp, "inline const RegMask &%s%s_mask() {"
+ " return (%s) ?"
+ " %s%s_mask() :"
+ " %s%s_mask(); }\n",
+ prefix, rc_name_to_upper,
+ _condition_code,
+ prefix, rclass_0_to_upper,
+ prefix, rclass_1_to_upper);
+ if (_stack_or_reg) {
+ fprintf(fp, "inline const RegMask &%sSTACK_OR_%s_mask() {"
+ " return (%s) ?"
+ " %sSTACK_OR_%s_mask() :"
+ " %sSTACK_OR_%s_mask(); }\n",
+ prefix, rc_name_to_upper,
+ _condition_code,
+ prefix, rclass_0_to_upper,
+ prefix, rclass_1_to_upper);
+ }
+ delete[] rc_name_to_upper;
+ delete[] rclass_0_to_upper;
+ delete[] rclass_1_to_upper;
+ return;
+}
//------------------------------AllocClass-------------------------------------
AllocClass::AllocClass(char *classid) : _classid(classid), _regDef(cmpstr,hashstr, Form::arena) {
--- a/hotspot/src/share/vm/adlc/formsopt.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/formsopt.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -60,6 +60,8 @@
class InsEncode;
class RegDef;
class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
class AllocClass;
class ResourceForm;
class PipeClassForm;
@@ -98,7 +100,8 @@
void addRegDef(char *regName, char *callingConv, char *c_conv,
char * idealtype, char *encoding, char* concreteName);
- RegClass *addRegClass(const char *className);
+ template<typename T> T* addRegClass(const char* className);
+
AllocClass *addAllocClass(char *allocName);
void addSpillRegClass();
@@ -154,17 +157,28 @@
};
//------------------------------RegClass---------------------------------------
+// Generic register class. This register class is the internal representation
+// for the following .ad file format:
+//
+// reg_class ptr(RAX, RBX, ...);
+//
+// where ptr is the name of the register class, RAX and RBX are registers.
+//
+// This register class allows registers to be spilled onto the stack. Spilling
+// is allowed is field _stack_or_reg is true.
class RegClass : public Form {
public:
// Public Data
const char *_classid; // Name of class
NameList _regDefs; // List of registers in class
Dict _regDef; // Dictionary of registers in class
+protected:
bool _stack_or_reg; // Allowed on any stack slot
- char* _user_defined;
+public:
// Public Methods
RegClass(const char *classid);// Constructor
+ virtual ~RegClass();
void addReg(RegDef *regDef); // Add a register to this class
@@ -183,6 +197,115 @@
void dump(); // Debug printer
void output(FILE *fp); // Write info to output files
+
+ virtual bool has_stack_version() {
+ return _stack_or_reg;
+ }
+ virtual void set_stack_version(bool flag) {
+ _stack_or_reg = flag;
+ }
+
+ virtual void declare_register_masks(FILE* fp);
+ virtual void build_register_masks(FILE* fp);
+};
+
+//------------------------------CodeSnippetRegClass----------------------------
+// Register class that has an user-defined C++ code snippet attached to it
+// to determine at runtime which register class to use. This register class is
+// the internal representation for the following .ad file format:
+//
+// reg_class actual_dflt_reg %{
+// if (VM_Version::has_vfp3_32()) {
+// return DFLT_REG_mask();
+// } else {
+// return DFLT_LOW_REG_mask();
+// }
+// %}
+//
+// where DFLT_REG_mask() and DFLT_LOW_REG_mask() are the internal names of the
+// masks of register classes dflt_reg and dflt_low_reg.
+//
+// The attached code snippet can select also between more than two register classes.
+// This register class can be, however, used only if the register class is not
+// cisc-spillable (i.e., the registers of this class are not allowed on the stack,
+// which is equivalent with _stack_or_reg being false).
+class CodeSnippetRegClass : public RegClass {
+protected:
+ char* _code_snippet;
+public:
+ CodeSnippetRegClass(const char* classid);// Constructor
+ ~CodeSnippetRegClass();
+
+ void set_code_snippet(char* code) {
+ _code_snippet = code;
+ }
+ char* code_snippet() {
+ return _code_snippet;
+ }
+ void set_stack_version(bool flag) {
+ assert(false, "User defined register classes are not allowed to spill to the stack.");
+ }
+ void declare_register_masks(FILE* fp);
+ void build_register_masks(FILE* fp) {
+ // We do not need to generate register masks because we select at runtime
+ // between register masks generated for other register classes.
+ return;
+ }
+};
+
+//------------------------------ConditionalRegClass----------------------------
+// Register class that has two register classes and a runtime condition attached
+// to it. The condition is evaluated at runtime and either one of the register
+// attached register classes is selected. This register class is the internal
+// representation for the following .ad format:
+//
+// reg_class_dynamic actual_dflt_reg(dflt_reg, low_reg,
+// %{ VM_Version::has_vfp3_32() }%
+// );
+//
+// This example is equivalent to the example used with the CodeSnippetRegClass
+// register class. A ConditionalRegClass works also if a register class is cisc-spillable
+// (i.e., _stack_or_reg is true), but if can select only between two register classes.
+class ConditionalRegClass : public RegClass {
+protected:
+ // reference to condition code
+ char* _condition_code; // C++ condition code to dynamically determine which register class to use.
+
+ // Example syntax (equivalent to previous example):
+ //
+ // reg_class actual_dflt_reg(dflt_reg, low_reg,
+ // %{ VM_Version::has_vfp3_32() }%
+ // );
+ // reference to conditional register classes
+ RegClass* _rclasses[2]; // 0 is the register class selected if the condition code returns true
+ // 1 is the register class selected if the condition code returns false
+public:
+ ConditionalRegClass(const char* classid);// Constructor
+ ~ConditionalRegClass();
+
+ virtual void set_stack_version(bool flag) {
+ RegClass::set_stack_version(flag);
+ assert((_rclasses[0] != NULL), "Register class NULL for condition code == true");
+ assert((_rclasses[1] != NULL), "Register class NULL for condition code == false");
+ _rclasses[0]->set_stack_version(flag);
+ _rclasses[1]->set_stack_version(flag);
+ }
+ void declare_register_masks(FILE* fp);
+ void build_register_masks(FILE* fp) {
+ // We do not need to generate register masks because we select at runtime
+ // between register masks generated for other register classes.
+ return;
+ }
+ void set_rclass_at_index(int index, RegClass* rclass) {
+ assert((0 <= index && index < 2), "Condition code can select only between two register classes");
+ _rclasses[index] = rclass;
+ }
+ void set_condition_code(char* code) {
+ _condition_code = code;
+ }
+ char* condition_code() {
+ return _condition_code;
+ }
};
//------------------------------AllocClass-------------------------------------
--- a/hotspot/src/share/vm/adlc/formssel.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/formssel.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -4043,6 +4043,13 @@
strcmp(opType,"ReplicateL")==0 ||
strcmp(opType,"ReplicateF")==0 ||
strcmp(opType,"ReplicateD")==0 ||
+ strcmp(opType,"AddReductionVI")==0 ||
+ strcmp(opType,"AddReductionVL")==0 ||
+ strcmp(opType,"AddReductionVF")==0 ||
+ strcmp(opType,"AddReductionVD")==0 ||
+ strcmp(opType,"MulReductionVI")==0 ||
+ strcmp(opType,"MulReductionVF")==0 ||
+ strcmp(opType,"MulReductionVD")==0 ||
0 /* 0 to line up columns nicely */ )
return 1;
}
@@ -4135,6 +4142,10 @@
"MulVS","MulVI","MulVF","MulVD",
"DivVF","DivVD",
"AndV" ,"XorV" ,"OrV",
+ "AddReductionVI", "AddReductionVL",
+ "AddReductionVF", "AddReductionVD",
+ "MulReductionVI",
+ "MulReductionVF", "MulReductionVD",
"LShiftCntV","RShiftCntV",
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
--- a/hotspot/src/share/vm/adlc/formssel.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/formssel.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -59,6 +59,8 @@
class InsEncode;
class RegDef;
class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
class AllocClass;
class ResourceForm;
class PipeDesc;
--- a/hotspot/src/share/vm/adlc/output_c.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/adlc/output_c.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -138,26 +138,9 @@
fprintf(fp_hpp,"// Register masks, one for each register class.\n");
_register->_rclasses.reset();
for (rc_name = NULL; (rc_name = _register->_rclasses.iter()) != NULL;) {
- const char *prefix = "";
RegClass *reg_class = _register->getRegClass(rc_name);
assert(reg_class, "Using an undefined register class");
-
- const char* rc_name_to_upper = toUpper(rc_name);
-
- if (reg_class->_user_defined == NULL) {
- fprintf(fp_hpp, "extern const RegMask _%s%s_mask;\n", prefix, rc_name_to_upper);
- fprintf(fp_hpp, "inline const RegMask &%s%s_mask() { return _%s%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
- } else {
- fprintf(fp_hpp, "inline const RegMask &%s%s_mask() { %s }\n", prefix, rc_name_to_upper, reg_class->_user_defined);
- }
-
- if (reg_class->_stack_or_reg) {
- assert(reg_class->_user_defined == NULL, "no user defined reg class here");
- fprintf(fp_hpp, "extern const RegMask _%sSTACK_OR_%s_mask;\n", prefix, rc_name_to_upper);
- fprintf(fp_hpp, "inline const RegMask &%sSTACK_OR_%s_mask() { return _%sSTACK_OR_%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
- }
- delete[] rc_name_to_upper;
-
+ reg_class->declare_register_masks(fp_hpp);
}
}
}
@@ -173,35 +156,9 @@
fprintf(fp_cpp,"// Register masks, one for each register class.\n");
_register->_rclasses.reset();
for (rc_name = NULL; (rc_name = _register->_rclasses.iter()) != NULL;) {
- const char *prefix = "";
RegClass *reg_class = _register->getRegClass(rc_name);
assert(reg_class, "Using an undefined register class");
-
- if (reg_class->_user_defined != NULL) {
- continue;
- }
-
- int len = RegisterForm::RegMask_Size();
- const char* rc_name_to_upper = toUpper(rc_name);
- fprintf(fp_cpp, "const RegMask _%s%s_mask(", prefix, rc_name_to_upper);
-
- {
- int i;
- for(i = 0; i < len - 1; i++) {
- fprintf(fp_cpp," 0x%x,", reg_class->regs_in_word(i, false));
- }
- fprintf(fp_cpp," 0x%x );\n", reg_class->regs_in_word(i, false));
- }
-
- if (reg_class->_stack_or_reg) {
- int i;
- fprintf(fp_cpp, "const RegMask _%sSTACK_OR_%s_mask(", prefix, rc_name_to_upper);
- for(i = 0; i < len - 1; i++) {
- fprintf(fp_cpp," 0x%x,",reg_class->regs_in_word(i, true));
- }
- fprintf(fp_cpp," 0x%x );\n",reg_class->regs_in_word(i, true));
- }
- delete[] rc_name_to_upper;
+ reg_class->build_register_masks(fp_cpp);
}
}
}
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -3462,6 +3462,24 @@
case vmIntrinsics::_putFloat : return append_unsafe_put_obj(callee, T_FLOAT, false);
case vmIntrinsics::_putDouble : return append_unsafe_put_obj(callee, T_DOUBLE, false);
+ case vmIntrinsics::_getShortUnaligned :
+ return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_SHORT, false) : false;
+ case vmIntrinsics::_getCharUnaligned :
+ return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_CHAR, false) : false;
+ case vmIntrinsics::_getIntUnaligned :
+ return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_INT, false) : false;
+ case vmIntrinsics::_getLongUnaligned :
+ return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_LONG, false) : false;
+
+ case vmIntrinsics::_putShortUnaligned :
+ return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_SHORT, false) : false;
+ case vmIntrinsics::_putCharUnaligned :
+ return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_CHAR, false) : false;
+ case vmIntrinsics::_putIntUnaligned :
+ return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_INT, false) : false;
+ case vmIntrinsics::_putLongUnaligned :
+ return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_LONG, false) : false;
+
case vmIntrinsics::_getObjectVolatile : return append_unsafe_get_obj(callee, T_OBJECT, true);
case vmIntrinsics::_getBooleanVolatile: return append_unsafe_get_obj(callee, T_BOOLEAN, true);
case vmIntrinsics::_getByteVolatile : return append_unsafe_get_obj(callee, T_BYTE, true);
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -868,9 +868,12 @@
\
/* Custom branch frequencies profiling support for JSR292 */ \
do_class(java_lang_invoke_MethodHandleImpl, "java/lang/invoke/MethodHandleImpl") \
- do_intrinsic(_profileBoolean, java_lang_invoke_MethodHandleImpl, profileBoolean_name, profileBoolean_signature, F_S) \
- do_name( profileBoolean_name, "profileBoolean") \
- do_signature(profileBoolean_signature, "(Z[I)Z") \
+ do_intrinsic(_profileBoolean, java_lang_invoke_MethodHandleImpl, profileBoolean_name, profileBoolean_signature, F_S) \
+ do_name( profileBoolean_name, "profileBoolean") \
+ do_signature(profileBoolean_signature, "(Z[I)Z") \
+ do_intrinsic(_isCompileConstant, java_lang_invoke_MethodHandleImpl, isCompileConstant_name, isCompileConstant_signature, F_S) \
+ do_name( isCompileConstant_name, "isCompileConstant") \
+ do_alias( isCompileConstant_signature, object_boolean_signature) \
\
/* unsafe memory references (there are a lot of them...) */ \
do_signature(getObject_signature, "(Ljava/lang/Object;J)Ljava/lang/Object;") \
@@ -950,6 +953,20 @@
do_intrinsic(_putFloatVolatile, sun_misc_Unsafe, putFloatVolatile_name, putFloat_signature, F_RN) \
do_intrinsic(_putDoubleVolatile, sun_misc_Unsafe, putDoubleVolatile_name, putDouble_signature, F_RN) \
\
+ do_name(getShortUnaligned_name,"getShortUnaligned") do_name(putShortUnaligned_name,"putShortUnaligned") \
+ do_name(getCharUnaligned_name,"getCharUnaligned") do_name(putCharUnaligned_name,"putCharUnaligned") \
+ do_name(getIntUnaligned_name,"getIntUnaligned") do_name(putIntUnaligned_name,"putIntUnaligned") \
+ do_name(getLongUnaligned_name,"getLongUnaligned") do_name(putLongUnaligned_name,"putLongUnaligned") \
+ \
+ do_intrinsic(_getShortUnaligned, sun_misc_Unsafe, getShortUnaligned_name, getShort_signature, F_R) \
+ do_intrinsic(_getCharUnaligned, sun_misc_Unsafe, getCharUnaligned_name, getChar_signature, F_R) \
+ do_intrinsic(_getIntUnaligned, sun_misc_Unsafe, getIntUnaligned_name, getInt_signature, F_R) \
+ do_intrinsic(_getLongUnaligned, sun_misc_Unsafe, getLongUnaligned_name, getLong_signature, F_R) \
+ do_intrinsic(_putShortUnaligned, sun_misc_Unsafe, putShortUnaligned_name, putShort_signature, F_R) \
+ do_intrinsic(_putCharUnaligned, sun_misc_Unsafe, putCharUnaligned_name, putChar_signature, F_R) \
+ do_intrinsic(_putIntUnaligned, sun_misc_Unsafe, putIntUnaligned_name, putInt_signature, F_R) \
+ do_intrinsic(_putLongUnaligned, sun_misc_Unsafe, putLongUnaligned_name, putLong_signature, F_R) \
+ \
/* %%% these are redundant except perhaps for getAddress, but Unsafe has native methods for them */ \
do_signature(getByte_raw_signature, "(J)B") \
do_signature(putByte_raw_signature, "(JB)V") \
--- a/hotspot/src/share/vm/code/dependencies.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/code/dependencies.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -845,7 +845,13 @@
assert((uint)n <= (uint)_num_participants, "oob");
Method* fm = _found_methods[n];
assert(n == _num_participants || fm != NULL, "proper usage");
- assert(fm == NULL || fm->method_holder() == _participants[n], "sanity");
+ if (fm != NULL && fm->method_holder() != _participants[n]) {
+ // Default methods from interfaces can be added to classes. In
+ // that case the holder of the method is not the class but the
+ // interface where it's defined.
+ assert(fm->is_default_method(), "sanity");
+ return NULL;
+ }
return fm;
}
--- a/hotspot/src/share/vm/code/nmethod.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/code/nmethod.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -504,7 +504,7 @@
basic_lock_owner_sp_offset,
basic_lock_sp_offset, oop_maps);
NOT_PRODUCT(if (nm != NULL) nmethod_stats.note_native_nmethod(nm));
- if (PrintAssembly && nm != NULL) {
+ if ((PrintAssembly || CompilerOracle::should_print(method)) && nm != NULL) {
Disassembler::decode(nm);
}
}
@@ -2837,11 +2837,21 @@
st.print(")");
return st.as_string();
}
+ case relocInfo::runtime_call_type: {
+ stringStream st;
+ st.print("runtime_call");
+ runtime_call_Relocation* r = iter.runtime_call_reloc();
+ address dest = r->destination();
+ CodeBlob* cb = CodeCache::find_blob(dest);
+ if (cb != NULL) {
+ st.print(" %s", cb->name());
+ }
+ return st.as_string();
+ }
case relocInfo::virtual_call_type: return "virtual_call";
case relocInfo::opt_virtual_call_type: return "optimized virtual_call";
case relocInfo::static_call_type: return "static_call";
case relocInfo::static_stub_type: return "static_stub";
- case relocInfo::runtime_call_type: return "runtime_call";
case relocInfo::external_word_type: return "external_word";
case relocInfo::internal_word_type: return "internal_word";
case relocInfo::section_word_type: return "section_word";
--- a/hotspot/src/share/vm/code/pcDesc.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/code/pcDesc.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -54,12 +54,7 @@
for (ScopeDesc* sd = code->scope_desc_at(real_pc(code));
sd != NULL;
sd = sd->sender()) {
- tty->print(" ");
- sd->method()->print_short_name(tty);
- tty->print(" @%d", sd->bci());
- if (sd->should_reexecute())
- tty->print(" reexecute=true");
- tty->cr();
+ sd->print_on(tty);
}
#endif
}
--- a/hotspot/src/share/vm/code/scopeDesc.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/code/scopeDesc.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -157,14 +157,18 @@
#ifndef PRODUCT
void ScopeDesc::print_value_on(outputStream* st) const {
- tty->print(" ");
+ st->print(" ");
method()->print_short_name(st);
int lineno = method()->line_number_from_bci(bci());
if (lineno != -1) {
- st->print_cr("@%d (line %d)", bci(), lineno);
+ st->print("@%d (line %d)", bci(), lineno);
} else {
- st->print_cr("@%d", bci());
+ st->print("@%d", bci());
}
+ if (should_reexecute()) {
+ st->print(" reexecute=true");
+ }
+ st->cr();
}
void ScopeDesc::print_on(outputStream* st) const {
@@ -174,7 +178,7 @@
void ScopeDesc::print_on(outputStream* st, PcDesc* pd) const {
// header
if (pd != NULL) {
- tty->print_cr("ScopeDesc(pc=" PTR_FORMAT " offset=%x):", pd->real_pc(_code), pd->pc_offset());
+ st->print_cr("ScopeDesc(pc=" PTR_FORMAT " offset=%x):", pd->real_pc(_code), pd->pc_offset());
}
print_value_on(st);
@@ -192,7 +196,7 @@
// locals
{ GrowableArray<ScopeValue*>* l = ((ScopeDesc*) this)->locals();
if (l != NULL) {
- tty->print_cr(" Locals");
+ st->print_cr(" Locals");
for (int index = 0; index < l->length(); index++) {
st->print(" - l%d: ", index);
l->at(index)->print_on(st);
@@ -205,7 +209,7 @@
if (l != NULL) {
st->print_cr(" Expression stack");
for (int index = 0; index < l->length(); index++) {
- st->print(" - @%d: ", index);
+ st->print(" - @%d: ", index);
l->at(index)->print_on(st);
st->cr();
}
@@ -225,12 +229,12 @@
#ifdef COMPILER2
if (DoEscapeAnalysis && is_top() && _objects != NULL) {
- tty->print_cr("Objects");
+ st->print_cr(" Objects");
for (int i = 0; i < _objects->length(); i++) {
ObjectValue* sv = (ObjectValue*) _objects->at(i);
- tty->print(" - %d: ", sv->id());
- sv->print_fields_on(tty);
- tty->cr();
+ st->print(" - %d: ", sv->id());
+ sv->print_fields_on(st);
+ st->cr();
}
}
#endif // COMPILER2
--- a/hotspot/src/share/vm/oops/methodData.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/oops/methodData.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -152,6 +152,7 @@
void BitData::print_data_on(outputStream* st, const char* extra) const {
print_shared(st, "BitData", extra);
+ st->cr();
}
// ==================================================================
--- a/hotspot/src/share/vm/oops/methodData.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/oops/methodData.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -2056,7 +2056,7 @@
// Whole-method sticky bits and flags
enum {
- _trap_hist_limit = 21, // decoupled from Deoptimization::Reason_LIMIT
+ _trap_hist_limit = 22, // decoupled from Deoptimization::Reason_LIMIT
_trap_hist_mask = max_jubyte,
_extra_data_count = 4 // extra DataLayout headers, for trap history
}; // Public flag values
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -324,6 +324,9 @@
develop(bool, SuperWordRTDepCheck, false, \
"Enable runtime dependency checks.") \
\
+ product(bool, SuperWordReductions, true, \
+ "Enable reductions support in superword.") \
+ \
notproduct(bool, TraceSuperWord, false, \
"Trace superword transforms") \
\
--- a/hotspot/src/share/vm/opto/castnode.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/castnode.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -216,16 +216,13 @@
const Type *result = _type;
if( in_type != NULL && my_type != NULL ) {
TypePtr::PTR in_ptr = in_type->ptr();
- if( in_ptr == TypePtr::Null ) {
+ if (in_ptr == TypePtr::Null) {
result = in_type;
- } else if( in_ptr == TypePtr::Constant ) {
- // Casting a constant oop to an interface?
- // (i.e., a String to a Comparable?)
- // Then return the interface.
+ } else if (in_ptr == TypePtr::Constant) {
const TypeOopPtr *jptr = my_type->isa_oopptr();
- assert( jptr, "" );
- result = (jptr->klass()->is_interface() || !in_type->higher_equal(_type))
- ? my_type->cast_to_ptr_type( TypePtr::NotNull )
+ assert(jptr, "");
+ result = !in_type->higher_equal(_type)
+ ? my_type->cast_to_ptr_type(TypePtr::NotNull)
: in_type;
} else {
result = my_type->cast_to_ptr_type( my_type->join_ptr(in_ptr) );
--- a/hotspot/src/share/vm/opto/cfgnode.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -525,13 +525,16 @@
// Cut the backedge input and remove phis since no data paths left.
// We don't cut outputs to other nodes here since we need to put them
// on the worklist.
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+ if (in(1)->outcnt() == 1) {
+ igvn->_worklist.push(in(1));
+ }
del_req(1);
cnt = 0;
assert( req() == 1, "no more inputs expected" );
uint max = outcnt();
bool progress = true;
Node *top = phase->C->top();
- PhaseIterGVN *igvn = phase->is_IterGVN();
DUIterator j;
while(progress) {
progress = false;
--- a/hotspot/src/share/vm/opto/cfgnode.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/cfgnode.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -263,6 +263,30 @@
// Size is bigger to hold the probability field. However, _prob does not
// change the semantics so it does not appear in the hash & cmp functions.
virtual uint size_of() const { return sizeof(*this); }
+
+private:
+ ProjNode* range_check_trap_proj(int& flip, Node*& l, Node*& r);
+ ProjNode* range_check_trap_proj() {
+ int flip_test = 0;
+ Node* l = NULL;
+ Node* r = NULL;
+ return range_check_trap_proj(flip_test, l, r);
+ }
+
+ // Helper methods for fold_compares
+ bool cmpi_folds(PhaseIterGVN* igvn);
+ bool is_ctrl_folds(Node* ctrl, PhaseIterGVN* igvn);
+ bool has_shared_region(ProjNode* proj, ProjNode*& success, ProjNode*& fail);
+ bool has_only_uncommon_traps(ProjNode* proj, ProjNode*& success, ProjNode*& fail, PhaseIterGVN* igvn);
+ static void merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn);
+ static void improve_address_types(Node* l, Node* r, ProjNode* fail, PhaseIterGVN* igvn);
+ bool is_cmp_with_loadrange(ProjNode* proj);
+ bool is_null_check(ProjNode* proj, PhaseIterGVN* igvn);
+ bool is_side_effect_free_test(ProjNode* proj, PhaseIterGVN* igvn);
+ void reroute_side_effect_free_unc(ProjNode* proj, ProjNode* dom_proj, PhaseIterGVN* igvn);
+ ProjNode* uncommon_trap_proj(CallStaticJavaNode*& call) const;
+ bool fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn);
+
public:
// Degrees of branch prediction probability by order of magnitude:
@@ -348,7 +372,7 @@
virtual const RegMask &out_RegMask() const;
void dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
int is_range_check(Node* &range, Node* &index, jint &offset);
- Node* fold_compares(PhaseGVN* phase);
+ Node* fold_compares(PhaseIterGVN* phase);
static Node* up_one_dom(Node* curr, bool linear_only = false);
// Takes the type of val and filters it through the test represented
--- a/hotspot/src/share/vm/opto/classes.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/classes.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -266,9 +266,13 @@
macro(AddVB)
macro(AddVS)
macro(AddVI)
+macro(AddReductionVI)
macro(AddVL)
+macro(AddReductionVL)
macro(AddVF)
+macro(AddReductionVF)
macro(AddVD)
+macro(AddReductionVD)
macro(SubVB)
macro(SubVS)
macro(SubVI)
@@ -277,8 +281,11 @@
macro(SubVD)
macro(MulVS)
macro(MulVI)
+macro(MulReductionVI)
macro(MulVF)
+macro(MulReductionVF)
macro(MulVD)
+macro(MulReductionVD)
macro(DivVF)
macro(DivVD)
macro(LShiftCntV)
--- a/hotspot/src/share/vm/opto/compile.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/compile.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -3049,6 +3049,15 @@
case Op_StoreVector:
break;
+ case Op_AddReductionVI:
+ case Op_AddReductionVL:
+ case Op_AddReductionVF:
+ case Op_AddReductionVD:
+ case Op_MulReductionVI:
+ case Op_MulReductionVF:
+ case Op_MulReductionVD:
+ break;
+
case Op_PackB:
case Op_PackS:
case Op_PackI:
--- a/hotspot/src/share/vm/opto/graphKit.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -2530,6 +2530,11 @@
// prior to coming here.
Node* Phase::gen_subtype_check(Node* subklass, Node* superklass, Node** ctrl, MergeMemNode* mem, PhaseGVN* gvn) {
Compile* C = gvn->C;
+
+ if ((*ctrl)->is_top()) {
+ return C->top();
+ }
+
// Fast check for identical types, perhaps identical constants.
// The types can even be identical non-constants, in cases
// involving Array.newInstance, Object.clone, etc.
@@ -2792,18 +2797,19 @@
*/
Node* GraphKit::maybe_cast_profiled_obj(Node* obj,
ciKlass* type,
- bool not_null,
- SafePointNode* sfpt) {
+ bool not_null) {
+ if (stopped()) {
+ return obj;
+ }
+
// type == NULL if profiling tells us this object is always null
if (type != NULL) {
Deoptimization::DeoptReason class_reason = Deoptimization::Reason_speculate_class_check;
Deoptimization::DeoptReason null_reason = Deoptimization::Reason_speculate_null_check;
- ciMethod* trap_method = (sfpt == NULL) ? method() : sfpt->jvms()->method();
- int trap_bci = (sfpt == NULL) ? bci() : sfpt->jvms()->bci();
if (!too_many_traps(null_reason) && !too_many_recompiles(null_reason) &&
- !C->too_many_traps(trap_method, trap_bci, class_reason) &&
- !C->too_many_recompiles(trap_method, trap_bci, class_reason)) {
+ !too_many_traps(class_reason) &&
+ !too_many_recompiles(class_reason)) {
Node* not_null_obj = NULL;
// not_null is true if we know the object is not null and
// there's no need for a null check
@@ -2819,12 +2825,7 @@
ciKlass* exact_kls = type;
Node* slow_ctl = type_check_receiver(exact_obj, exact_kls, 1.0,
&exact_obj);
- if (sfpt != NULL) {
- GraphKit kit(sfpt->jvms());
- PreserveJVMState pjvms(&kit);
- kit.set_control(slow_ctl);
- kit.uncommon_trap_exact(class_reason, Deoptimization::Action_maybe_recompile);
- } else {
+ {
PreserveJVMState pjvms(this);
set_control(slow_ctl);
uncommon_trap_exact(class_reason, Deoptimization::Action_maybe_recompile);
--- a/hotspot/src/share/vm/opto/graphKit.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -409,8 +409,7 @@
// Cast obj to type and emit guard unless we had too many traps here already
Node* maybe_cast_profiled_obj(Node* obj,
ciKlass* type,
- bool not_null = false,
- SafePointNode* sfpt = NULL);
+ bool not_null = false);
// Cast obj to not-null on this path
Node* cast_not_null(Node* obj, bool do_replace_in_map = true);
--- a/hotspot/src/share/vm/opto/ifnode.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/ifnode.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -25,11 +25,13 @@
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
+#include "opto/castnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/runtime.hpp"
+#include "opto/rootnode.hpp"
#include "opto/subnode.hpp"
// Portions of code courtesy of Clifford Click
@@ -449,62 +451,59 @@
return new ConINode(TypeInt::ZERO);
}
-//------------------------------is_range_check---------------------------------
-// Return 0 if not a range check. Return 1 if a range check and set index and
-// offset. Return 2 if we had to negate the test. Index is NULL if the check
-// is versus a constant.
-int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) {
+// if this IfNode follows a range check pattern return the projection
+// for the failed path
+ProjNode* IfNode::range_check_trap_proj(int& flip_test, Node*& l, Node*& r) {
Node* b = in(1);
- if (b == NULL || !b->is_Bool()) return 0;
+ if (b == NULL || !b->is_Bool()) return NULL;
BoolNode* bn = b->as_Bool();
Node* cmp = bn->in(1);
- if (cmp == NULL) return 0;
- if (cmp->Opcode() != Op_CmpU) return 0;
+ if (cmp == NULL) return NULL;
+ if (cmp->Opcode() != Op_CmpU) return NULL;
- Node* l = cmp->in(1);
- Node* r = cmp->in(2);
- int flip_test = 1;
+ l = cmp->in(1);
+ r = cmp->in(2);
+ flip_test = 1;
if (bn->_test._test == BoolTest::le) {
l = cmp->in(2);
r = cmp->in(1);
flip_test = 2;
} else if (bn->_test._test != BoolTest::lt) {
- return 0;
+ return NULL;
}
- if (l->is_top()) return 0; // Top input means dead test
- if (r->Opcode() != Op_LoadRange) return 0;
+ if (l->is_top()) return NULL; // Top input means dead test
+ if (r->Opcode() != Op_LoadRange) return NULL;
// We have recognized one of these forms:
// Flip 1: If (Bool[<] CmpU(l, LoadRange)) ...
// Flip 2: If (Bool[<=] CmpU(LoadRange, l)) ...
+ ProjNode* iftrap = proj_out(flip_test == 2 ? true : false);
+ return iftrap;
+}
+
+
+//------------------------------is_range_check---------------------------------
+// Return 0 if not a range check. Return 1 if a range check and set index and
+// offset. Return 2 if we had to negate the test. Index is NULL if the check
+// is versus a constant.
+int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) {
+ int flip_test = 0;
+ Node* l = NULL;
+ Node* r = NULL;
+ ProjNode* iftrap = range_check_trap_proj(flip_test, l, r);
+
+ if (iftrap == NULL) {
+ return 0;
+ }
+
// Make sure it's a real range check by requiring an uncommon trap
// along the OOB path. Otherwise, it's possible that the user wrote
// something which optimized to look like a range check but behaves
// in some other way.
- Node* iftrap = proj_out(flip_test == 2 ? true : false);
- bool found_trap = false;
- if (iftrap != NULL) {
- Node* u = iftrap->unique_ctrl_out();
- if (u != NULL) {
- // It could be a merge point (Region) for uncommon trap.
- if (u->is_Region()) {
- Node* c = u->unique_ctrl_out();
- if (c != NULL) {
- iftrap = u;
- u = c;
- }
- }
- if (u->in(0) == iftrap && u->is_CallStaticJava()) {
- int req = u->as_CallStaticJava()->uncommon_trap_request();
- if (Deoptimization::trap_request_reason(req) ==
- Deoptimization::Reason_range_check) {
- found_trap = true;
- }
- }
- }
+ if (iftrap->is_uncommon_trap_proj(Deoptimization::Reason_range_check) == NULL) {
+ return 0;
}
- if (!found_trap) return 0; // sorry, no cigar
// Look for index+offset form
Node* ind = l;
@@ -664,11 +663,12 @@
//------------------------------fold_compares----------------------------
// See if a pair of CmpIs can be converted into a CmpU. In some cases
// the direction of this if is determined by the preceding if so it
-// can be eliminate entirely. Given an if testing (CmpI n c) check
-// for an immediately control dependent if that is testing (CmpI n c2)
-// and has one projection leading to this if and the other projection
-// leading to a region that merges one of this ifs control
-// projections.
+// can be eliminate entirely.
+//
+// Given an if testing (CmpI n v) check for an immediately control
+// dependent if that is testing (CmpI n v2) and has one projection
+// leading to this if and the other projection leading to a region
+// that merges one of this ifs control projections.
//
// If
// / |
@@ -680,79 +680,458 @@
// / \ |
// / Region
//
-Node* IfNode::fold_compares(PhaseGVN* phase) {
- if (Opcode() != Op_If) return NULL;
+// Or given an if testing (CmpI n v) check for a dominating if that is
+// testing (CmpI n v2), both having one projection leading to an
+// uncommon trap. Allow Another independent guard in between to cover
+// an explicit range check:
+// if (index < 0 || index >= array.length) {
+// which may need a null check to guard the LoadRange
+//
+// If
+// / \
+// / \
+// / \
+// If unc
+// /\
+// / \
+// / \
+// / unc
+//
+
+// Is the comparison for this If suitable for folding?
+bool IfNode::cmpi_folds(PhaseIterGVN* igvn) {
+ return in(1) != NULL &&
+ in(1)->is_Bool() &&
+ in(1)->in(1) != NULL &&
+ in(1)->in(1)->Opcode() == Op_CmpI &&
+ in(1)->in(1)->in(2) != NULL &&
+ in(1)->in(1)->in(2) != igvn->C->top() &&
+ (in(1)->as_Bool()->_test.is_less() ||
+ in(1)->as_Bool()->_test.is_greater());
+}
+
+// Is a dominating control suitable for folding with this if?
+bool IfNode::is_ctrl_folds(Node* ctrl, PhaseIterGVN* igvn) {
+ return ctrl != NULL &&
+ ctrl->is_Proj() &&
+ ctrl->in(0) != NULL &&
+ ctrl->in(0)->is_If() &&
+ ctrl->in(0)->outcnt() == 2 &&
+ ctrl->in(0)->as_If()->cmpi_folds(igvn) &&
+ // Must compare same value
+ ctrl->in(0)->in(1)->in(1)->in(1) != NULL &&
+ ctrl->in(0)->in(1)->in(1)->in(1) == in(1)->in(1)->in(1);
+}
+
+// Do this If and the dominating If share a region?
+bool IfNode::has_shared_region(ProjNode* proj, ProjNode*& success, ProjNode*& fail) {
+ ProjNode* otherproj = proj->other_if_proj();
+ Node* otherproj_ctrl_use = otherproj->unique_ctrl_out();
+ RegionNode* region = (otherproj_ctrl_use != NULL && otherproj_ctrl_use->is_Region()) ? otherproj_ctrl_use->as_Region() : NULL;
+ success = NULL;
+ fail = NULL;
+ if (otherproj->outcnt() == 1 && region != NULL && !region->has_phi()) {
+ for (int i = 0; i < 2; i++) {
+ ProjNode* proj = proj_out(i);
+ if (success == NULL && proj->outcnt() == 1 && proj->unique_out() == region) {
+ success = proj;
+ } else if (fail == NULL) {
+ fail = proj;
+ } else {
+ success = fail = NULL;
+ }
+ }
+ }
+ return success != NULL && fail != NULL;
+}
+
+// Return projection that leads to an uncommon trap if any
+ProjNode* IfNode::uncommon_trap_proj(CallStaticJavaNode*& call) const {
+ for (int i = 0; i < 2; i++) {
+ call = proj_out(i)->is_uncommon_trap_proj(Deoptimization::Reason_none);
+ if (call != NULL) {
+ return proj_out(i);
+ }
+ }
+ return NULL;
+}
+
+// Do this If and the dominating If both branch out to an uncommon trap
+bool IfNode::has_only_uncommon_traps(ProjNode* proj, ProjNode*& success, ProjNode*& fail, PhaseIterGVN* igvn) {
+ ProjNode* otherproj = proj->other_if_proj();
+ CallStaticJavaNode* dom_unc = otherproj->is_uncommon_trap_proj(Deoptimization::Reason_none);
+
+ if (otherproj->outcnt() == 1 && dom_unc != NULL) {
+ CallStaticJavaNode* unc = NULL;
+ ProjNode* unc_proj = uncommon_trap_proj(unc);
+ if (unc_proj != NULL && unc_proj->outcnt() == 1) {
+ if (dom_unc == unc) {
+ // Allow the uncommon trap to be shared through a region
+ RegionNode* r = unc->in(0)->as_Region();
+ if (r->outcnt() != 2 || r->req() != 3 || r->find_edge(otherproj) == -1 || r->find_edge(unc_proj) == -1) {
+ return false;
+ }
+ assert(r->has_phi() == NULL, "simple region shouldn't have a phi");
+ } else if (dom_unc->in(0) != otherproj || unc->in(0) != unc_proj) {
+ return false;
+ }
+ // See merge_uncommon_traps: the reason of the uncommon trap
+ // will be changed and the state of the dominating If will be
+ // used. Checked that we didn't apply this transformation in a
+ // previous compilation and it didn't cause too many traps
+ if (!igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), Deoptimization::Reason_unstable_fused_if) &&
+ !igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), Deoptimization::Reason_range_check)) {
+ success = unc_proj;
+ fail = unc_proj->other_if_proj();
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// Check that the 2 CmpI can be folded into as single CmpU and proceed with the folding
+bool IfNode::fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn) {
Node* this_cmp = in(1)->in(1);
- if (this_cmp != NULL && this_cmp->Opcode() == Op_CmpI &&
- this_cmp->in(2)->is_Con() && this_cmp->in(2) != phase->C->top()) {
- Node* ctrl = in(0);
- BoolNode* this_bool = in(1)->as_Bool();
- Node* n = this_cmp->in(1);
- int hi = this_cmp->in(2)->get_int();
- if (ctrl != NULL && ctrl->is_Proj() && ctrl->outcnt() == 1 &&
- ctrl->in(0)->is_If() &&
- ctrl->in(0)->outcnt() == 2 &&
- ctrl->in(0)->in(1)->is_Bool() &&
- ctrl->in(0)->in(1)->in(1)->Opcode() == Op_CmpI &&
- ctrl->in(0)->in(1)->in(1)->in(2)->is_Con() &&
- ctrl->in(0)->in(1)->in(1)->in(2) != phase->C->top() &&
- ctrl->in(0)->in(1)->in(1)->in(1) == n) {
- IfNode* dom_iff = ctrl->in(0)->as_If();
- Node* otherproj = dom_iff->proj_out(!ctrl->as_Proj()->_con);
- if (otherproj->outcnt() == 1 && otherproj->unique_out()->is_Region() &&
- this_bool->_test._test != BoolTest::ne && this_bool->_test._test != BoolTest::eq) {
- // Identify which proj goes to the region and which continues on
- RegionNode* region = otherproj->unique_out()->as_Region();
- Node* success = NULL;
- Node* fail = NULL;
- for (int i = 0; i < 2; i++) {
- Node* proj = proj_out(i);
- if (success == NULL && proj->outcnt() == 1 && proj->unique_out() == region) {
- success = proj;
- } else if (fail == NULL) {
- fail = proj;
- } else {
- success = fail = NULL;
+ BoolNode* this_bool = in(1)->as_Bool();
+ IfNode* dom_iff = proj->in(0)->as_If();
+ BoolNode* dom_bool = dom_iff->in(1)->as_Bool();
+ Node* lo = dom_iff->in(1)->in(1)->in(2);
+ Node* hi = this_cmp->in(2);
+ Node* n = this_cmp->in(1);
+ ProjNode* otherproj = proj->other_if_proj();
+
+ const TypeInt* lo_type = IfNode::filtered_int_type(igvn, n, otherproj);
+ const TypeInt* hi_type = IfNode::filtered_int_type(igvn, n, success);
+
+ BoolTest::mask lo_test = dom_bool->_test._test;
+ BoolTest::mask hi_test = this_bool->_test._test;
+ BoolTest::mask cond = hi_test;
+
+ // Figure out which of the two tests sets the upper bound and which
+ // sets the lower bound if any.
+ if (hi_type->_lo > lo_type->_hi && hi_type->_hi == max_jint && lo_type->_lo == min_jint) {
+
+ assert((dom_bool->_test.is_less() && !proj->_con) ||
+ (dom_bool->_test.is_greater() && proj->_con), "incorrect test");
+ // this test was canonicalized
+ assert(this_bool->_test.is_less() && fail->_con, "incorrect test");
+
+ if (lo_test == BoolTest::gt || lo_test == BoolTest::le) {
+ lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
+ }
+ } else if (lo_type->_lo > hi_type->_hi && lo_type->_hi == max_jint && hi_type->_lo == min_jint) {
+ swap(lo, hi);
+ swap(lo_type, hi_type);
+ swap(lo_test, hi_test);
+
+ assert((this_bool->_test.is_less() && proj->_con) ||
+ (this_bool->_test.is_greater() && !proj->_con), "incorrect test");
+ // this test was canonicalized
+ assert(dom_bool->_test.is_less() && !fail->_con, "incorrect test");
+
+ cond = (hi_test == BoolTest::le || hi_test == BoolTest::gt) ? BoolTest::gt : BoolTest::ge;
+
+ if (lo_test == BoolTest::le) {
+ lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
+ }
+
+ } else {
+ const TypeInt* failtype = filtered_int_type(igvn, n, proj);
+ if (failtype != NULL) {
+ const TypeInt* type2 = filtered_int_type(igvn, n, fail);
+ if (type2 != NULL) {
+ failtype = failtype->join(type2)->is_int();
+ if (failtype->_lo > failtype->_hi) {
+ // previous if determines the result of this if so
+ // replace Bool with constant
+ igvn->hash_delete(this);
+ set_req(1, igvn->intcon(success->_con));
+ return true;
+ }
+ }
+ }
+
+ lo = NULL;
+ hi = NULL;
+ }
+
+ if (lo && hi) {
+ // Merge the two compares into a single unsigned compare by building (CmpU (n - lo) (hi - lo))
+ Node* adjusted_val = igvn->transform(new SubINode(n, lo));
+ Node* adjusted_lim = igvn->transform(new SubINode(hi, lo));
+ Node* newcmp = igvn->transform(new CmpUNode(adjusted_val, adjusted_lim));
+ Node* newbool = igvn->transform(new BoolNode(newcmp, cond));
+
+ igvn->is_IterGVN()->replace_input_of(dom_iff, 1, igvn->intcon(proj->_con));
+ igvn->hash_delete(this);
+ set_req(1, newbool);
+
+ return true;
+ }
+ return false;
+}
+
+// Merge the branches that trap for this If and the dominating If into
+// a single region that branches to the uncommon trap for the
+// dominating If
+void IfNode::merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn) {
+ ProjNode* otherproj = proj->other_if_proj();
+
+ CallStaticJavaNode* unc = success->is_uncommon_trap_proj(Deoptimization::Reason_none);
+ CallStaticJavaNode* dom_unc = otherproj->is_uncommon_trap_proj(Deoptimization::Reason_none);
+
+ if (unc != dom_unc) {
+ Node* r = new RegionNode(3);
+
+ r->set_req(1, otherproj);
+ r->set_req(2, success);
+ r = igvn->transform(r);
+ assert(r->is_Region(), "can't go away");
+
+ // Make both If trap at the state of the first If: once the CmpI
+ // nodes are merged, if we trap we don't know which of the CmpI
+ // nodes would have caused the trap so we have to restart
+ // execution at the first one
+ igvn->replace_input_of(dom_unc, 0, r);
+ igvn->replace_input_of(unc, 0, igvn->C->top());
+ }
+ int trap_request = dom_unc->uncommon_trap_request();
+ Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(trap_request);
+ Deoptimization::DeoptAction action = Deoptimization::trap_request_action(trap_request);
+
+ int flip_test = 0;
+ Node* l = NULL;
+ Node* r = NULL;
+
+ if (success->in(0)->as_If()->range_check_trap_proj(flip_test, l, r) != NULL) {
+ // If this looks like a range check, change the trap to
+ // Reason_range_check so the compiler recognizes it as a range
+ // check and applies the corresponding optimizations
+ trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_range_check, action);
+
+ improve_address_types(l, r, fail, igvn);
+ } else if (unc != dom_unc) {
+ // If we trap we won't know what CmpI would have caused the trap
+ // so use a special trap reason to mark this pair of CmpI nodes as
+ // bad candidate for folding. On recompilation we won't fold them
+ // and we may trap again but this time we'll know what branch
+ // traps
+ trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_unstable_fused_if, action);
+ }
+ igvn->replace_input_of(dom_unc, TypeFunc::Parms, igvn->intcon(trap_request));
+}
+
+// If we are turning 2 CmpI nodes into a CmpU that follows the pattern
+// of a rangecheck on index i, on 64 bit the compares may be followed
+// by memory accesses using i as index. In that case, the CmpU tells
+// us something about the values taken by i that can help the compiler
+// (see Compile::conv_I2X_index())
+void IfNode::improve_address_types(Node* l, Node* r, ProjNode* fail, PhaseIterGVN* igvn) {
+#ifdef _LP64
+ ResourceMark rm;
+ Node_Stack stack(2);
+
+ assert(r->Opcode() == Op_LoadRange, "unexpected range check");
+ const TypeInt* array_size = igvn->type(r)->is_int();
+
+ stack.push(l, 0);
+
+ while(stack.size() > 0) {
+ Node* n = stack.node();
+ uint start = stack.index();
+
+ uint i = start;
+ for (; i < n->outcnt(); i++) {
+ Node* use = n->raw_out(i);
+ if (stack.size() == 1) {
+ if (use->Opcode() == Op_ConvI2L) {
+ const TypeLong* bounds = use->as_Type()->type()->is_long();
+ if (bounds->_lo <= array_size->_lo && bounds->_hi >= array_size->_hi &&
+ (bounds->_lo != array_size->_lo || bounds->_hi != array_size->_hi)) {
+ stack.set_index(i+1);
+ stack.push(use, 0);
+ break;
}
}
- if (success != NULL && fail != NULL && !region->has_phi()) {
- int lo = dom_iff->in(1)->in(1)->in(2)->get_int();
- BoolNode* dom_bool = dom_iff->in(1)->as_Bool();
- Node* dom_cmp = dom_bool->in(1);
- const TypeInt* failtype = filtered_int_type(phase, n, ctrl);
- if (failtype != NULL) {
- const TypeInt* type2 = filtered_int_type(phase, n, fail);
- if (type2 != NULL) {
- failtype = failtype->join(type2)->is_int();
- } else {
- failtype = NULL;
- }
- }
+ } else if (use->is_Mem()) {
+ Node* ctrl = use->in(0);
+ for (int i = 0; i < 10 && ctrl != NULL && ctrl != fail; i++) {
+ ctrl = up_one_dom(ctrl);
+ }
+ if (ctrl == fail) {
+ Node* init_n = stack.node_at(1);
+ assert(init_n->Opcode() == Op_ConvI2L, "unexpected first node");
+ Node* new_n = igvn->C->conv_I2X_index(igvn, l, array_size);
- if (failtype != NULL &&
- dom_bool->_test._test != BoolTest::ne && dom_bool->_test._test != BoolTest::eq) {
- int bound = failtype->_hi - failtype->_lo + 1;
- if (failtype->_hi != max_jint && failtype->_lo != min_jint && bound > 1) {
- // Merge the two compares into a single unsigned compare by building (CmpU (n - lo) hi)
- BoolTest::mask cond = fail->as_Proj()->_con ? BoolTest::lt : BoolTest::ge;
- Node* adjusted = phase->transform(new SubINode(n, phase->intcon(failtype->_lo)));
- Node* newcmp = phase->transform(new CmpUNode(adjusted, phase->intcon(bound)));
- Node* newbool = phase->transform(new BoolNode(newcmp, cond));
- phase->is_IterGVN()->replace_input_of(dom_iff, 1, phase->intcon(ctrl->as_Proj()->_con));
- phase->hash_delete(this);
- set_req(1, newbool);
- return this;
- }
- if (failtype->_lo > failtype->_hi) {
- // previous if determines the result of this if so
- // replace Bool with constant
- phase->hash_delete(this);
- set_req(1, phase->intcon(success->as_Proj()->_con));
- return this;
- }
+ for (uint j = 2; j < stack.size(); j++) {
+ Node* n = stack.node_at(j);
+ Node* clone = n->clone();
+ int rep = clone->replace_edge(init_n, new_n);
+ assert(rep > 0, "can't find expected node?");
+ clone = igvn->transform(clone);
+ init_n = n;
+ new_n = clone;
+ }
+ igvn->hash_delete(use);
+ int rep = use->replace_edge(init_n, new_n);
+ assert(rep > 0, "can't find expected node?");
+ igvn->transform(use);
+ if (init_n->outcnt() == 0) {
+ igvn->_worklist.push(init_n);
}
}
+ } else if (use->in(0) == NULL && (igvn->type(use)->isa_long() ||
+ igvn->type(use)->isa_ptr())) {
+ stack.set_index(i+1);
+ stack.push(use, 0);
+ break;
+ }
+ }
+ if (i == n->outcnt()) {
+ stack.pop();
+ }
+ }
+#endif
+}
+
+bool IfNode::is_cmp_with_loadrange(ProjNode* proj) {
+ if (in(1) != NULL &&
+ in(1)->in(1) != NULL &&
+ in(1)->in(1)->in(2) != NULL) {
+ Node* other = in(1)->in(1)->in(2);
+ if (other->Opcode() == Op_LoadRange &&
+ ((other->in(0) != NULL && other->in(0) == proj) ||
+ (other->in(0) == NULL &&
+ other->in(2) != NULL &&
+ other->in(2)->is_AddP() &&
+ other->in(2)->in(1) != NULL &&
+ other->in(2)->in(1)->Opcode() == Op_CastPP &&
+ other->in(2)->in(1)->in(0) == proj))) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool IfNode::is_null_check(ProjNode* proj, PhaseIterGVN* igvn) {
+ Node* other = in(1)->in(1)->in(2);
+ if (other->in(MemNode::Address) != NULL &&
+ proj->in(0)->in(1) != NULL &&
+ proj->in(0)->in(1)->is_Bool() &&
+ proj->in(0)->in(1)->in(1) != NULL &&
+ proj->in(0)->in(1)->in(1)->Opcode() == Op_CmpP &&
+ proj->in(0)->in(1)->in(1)->in(2) != NULL &&
+ proj->in(0)->in(1)->in(1)->in(1) == other->in(MemNode::Address)->in(AddPNode::Address)->uncast() &&
+ igvn->type(proj->in(0)->in(1)->in(1)->in(2)) == TypePtr::NULL_PTR) {
+ return true;
+ }
+ return false;
+}
+
+// Check that the If that is in between the 2 integer comparisons has
+// no side effect
+bool IfNode::is_side_effect_free_test(ProjNode* proj, PhaseIterGVN* igvn) {
+ if (proj != NULL &&
+ proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
+ proj->outcnt() <= 2) {
+ if (proj->outcnt() == 1 ||
+ // Allow simple null check from LoadRange
+ (is_cmp_with_loadrange(proj) && is_null_check(proj, igvn))) {
+ CallStaticJavaNode* unc = proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
+ CallStaticJavaNode* dom_unc = proj->in(0)->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
+
+ // reroute_side_effect_free_unc changes the state of this
+ // uncommon trap to restart execution at the previous
+ // CmpI. Check that this change in a previous compilation didn't
+ // cause too many traps.
+ int trap_request = unc->uncommon_trap_request();
+ Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(trap_request);
+
+ if (igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), reason)) {
+ return false;
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+// Make the If between the 2 integer comparisons trap at the state of
+// the first If: the last CmpI is the one replaced by a CmpU and the
+// first CmpI is eliminated, so the test between the 2 CmpI nodes
+// won't be guarded by the first CmpI anymore. It can trap in cases
+// where the first CmpI would have prevented it from executing: on a
+// trap, we need to restart execution at the state of the first CmpI
+void IfNode::reroute_side_effect_free_unc(ProjNode* proj, ProjNode* dom_proj, PhaseIterGVN* igvn) {
+ CallStaticJavaNode* dom_unc = dom_proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
+ ProjNode* otherproj = proj->other_if_proj();
+ CallStaticJavaNode* unc = proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
+ Node* call_proj = dom_unc->unique_ctrl_out();
+ Node* halt = call_proj->unique_ctrl_out();
+
+ Node* new_unc = dom_unc->clone();
+ call_proj = call_proj->clone();
+ halt = halt->clone();
+ Node* c = otherproj->clone();
+
+ c = igvn->transform(c);
+ new_unc->set_req(TypeFunc::Parms, unc->in(TypeFunc::Parms));
+ new_unc->set_req(0, c);
+ new_unc = igvn->transform(new_unc);
+ call_proj->set_req(0, new_unc);
+ call_proj = igvn->transform(call_proj);
+ halt->set_req(0, call_proj);
+ halt = igvn->transform(halt);
+
+ igvn->replace_node(otherproj, igvn->C->top());
+ igvn->C->root()->add_req(halt);
+}
+
+Node* IfNode::fold_compares(PhaseIterGVN* igvn) {
+ if (Opcode() != Op_If) return NULL;
+
+ if (cmpi_folds(igvn)) {
+ Node* ctrl = in(0);
+ if (is_ctrl_folds(ctrl, igvn) &&
+ ctrl->outcnt() == 1) {
+ // A integer comparison immediately dominated by another integer
+ // comparison
+ ProjNode* success = NULL;
+ ProjNode* fail = NULL;
+ ProjNode* dom_cmp = ctrl->as_Proj();
+ if (has_shared_region(dom_cmp, success, fail) &&
+ // Next call modifies graph so must be last
+ fold_compares_helper(dom_cmp, success, fail, igvn)) {
+ return this;
+ }
+ if (has_only_uncommon_traps(dom_cmp, success, fail, igvn) &&
+ // Next call modifies graph so must be last
+ fold_compares_helper(dom_cmp, success, fail, igvn)) {
+ merge_uncommon_traps(dom_cmp, success, fail, igvn);
+ return this;
+ }
+ return NULL;
+ } else if (ctrl->in(0) != NULL &&
+ ctrl->in(0)->in(0) != NULL) {
+ ProjNode* success = NULL;
+ ProjNode* fail = NULL;
+ Node* dom = ctrl->in(0)->in(0);
+ ProjNode* dom_cmp = dom->isa_Proj();
+ ProjNode* other_cmp = ctrl->isa_Proj();
+
+ // Check if it's an integer comparison dominated by another
+ // integer comparison with another test in between
+ if (is_ctrl_folds(dom, igvn) &&
+ has_only_uncommon_traps(dom_cmp, success, fail, igvn) &&
+ is_side_effect_free_test(other_cmp, igvn) &&
+ // Next call modifies graph so must be last
+ fold_compares_helper(dom_cmp, success, fail, igvn)) {
+ reroute_side_effect_free_unc(other_cmp, dom_cmp, igvn);
+ merge_uncommon_traps(dom_cmp, success, fail, igvn);
+ return this;
}
}
}
@@ -1029,7 +1408,7 @@
// Normal equivalent-test check.
if( !dom ) return NULL; // Dead loop?
- Node* result = fold_compares(phase);
+ Node* result = fold_compares(igvn);
if (result != NULL) {
return result;
}
@@ -1089,7 +1468,7 @@
// be skipped. For example, range check predicate has two checks
// for lower and upper bounds.
ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj();
- if (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate))
+ if (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL)
prev_dom = idom;
// Now walk the current IfNode's projections.
--- a/hotspot/src/share/vm/opto/library_call.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -262,6 +262,9 @@
bool inline_arraycopy();
AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
RegionNode* slow_region);
+ JVMState* arraycopy_restore_alloc_state(AllocateArrayNode* alloc, int& saved_reexecute_sp);
+ void arraycopy_move_allocation_here(AllocateArrayNode* alloc, Node* dest, JVMState* saved_jvms, int saved_reexecute_sp);
+
typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind);
bool inline_unsafe_ordered_store(BasicType type);
@@ -290,6 +293,7 @@
bool inline_multiplyToLen();
bool inline_profileBoolean();
+ bool inline_isCompileConstant();
};
@@ -549,6 +553,17 @@
if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL;
break;
+ case vmIntrinsics::_getShortUnaligned:
+ case vmIntrinsics::_getCharUnaligned:
+ case vmIntrinsics::_getIntUnaligned:
+ case vmIntrinsics::_getLongUnaligned:
+ case vmIntrinsics::_putShortUnaligned:
+ case vmIntrinsics::_putCharUnaligned:
+ case vmIntrinsics::_putIntUnaligned:
+ case vmIntrinsics::_putLongUnaligned:
+ if (!UseUnalignedAccesses) return NULL;
+ break;
+
default:
assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -799,6 +814,16 @@
case vmIntrinsics::_putFloatVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, is_volatile);
case vmIntrinsics::_putDoubleVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, is_volatile);
+ case vmIntrinsics::_getShortUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, !is_volatile);
+ case vmIntrinsics::_getCharUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, !is_volatile);
+ case vmIntrinsics::_getIntUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, !is_volatile);
+ case vmIntrinsics::_getLongUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile);
+
+ case vmIntrinsics::_putShortUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, !is_volatile);
+ case vmIntrinsics::_putCharUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, !is_volatile);
+ case vmIntrinsics::_putIntUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, !is_volatile);
+ case vmIntrinsics::_putLongUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, !is_volatile);
+
case vmIntrinsics::_compareAndSwapObject: return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
case vmIntrinsics::_compareAndSwapInt: return inline_unsafe_load_store(T_INT, LS_cmpxchg);
case vmIntrinsics::_compareAndSwapLong: return inline_unsafe_load_store(T_LONG, LS_cmpxchg);
@@ -900,6 +925,8 @@
case vmIntrinsics::_profileBoolean:
return inline_profileBoolean();
+ case vmIntrinsics::_isCompileConstant:
+ return inline_isCompileConstant();
default:
// If you get here, it may be that someone has added a new intrinsic
@@ -3664,6 +3691,11 @@
//---------------------generate_array_guard_common------------------------
Node* LibraryCallKit::generate_array_guard_common(Node* kls, RegionNode* region,
bool obj_array, bool not_array) {
+
+ if (stopped()) {
+ return NULL;
+ }
+
// If obj_array/non_array==false/false:
// Branch around if the given klass is in fact an array (either obj or prim).
// If obj_array/non_array==false/true:
@@ -4674,6 +4706,141 @@
return true;
}
+// If we have a tighly coupled allocation, the arraycopy may take care
+// of the array initialization. If one of the guards we insert between
+// the allocation and the arraycopy causes a deoptimization, an
+// unitialized array will escape the compiled method. To prevent that
+// we set the JVM state for uncommon traps between the allocation and
+// the arraycopy to the state before the allocation so, in case of
+// deoptimization, we'll reexecute the allocation and the
+// initialization.
+JVMState* LibraryCallKit::arraycopy_restore_alloc_state(AllocateArrayNode* alloc, int& saved_reexecute_sp) {
+ if (alloc != NULL) {
+ ciMethod* trap_method = alloc->jvms()->method();
+ int trap_bci = alloc->jvms()->bci();
+
+ if (!C->too_many_traps(trap_method, trap_bci, Deoptimization::Reason_intrinsic) &
+ !C->too_many_traps(trap_method, trap_bci, Deoptimization::Reason_null_check)) {
+ // Make sure there's no store between the allocation and the
+ // arraycopy otherwise visible side effects could be rexecuted
+ // in case of deoptimization and cause incorrect execution.
+ bool no_interfering_store = true;
+ Node* mem = alloc->in(TypeFunc::Memory);
+ if (mem->is_MergeMem()) {
+ for (MergeMemStream mms(merged_memory(), mem->as_MergeMem()); mms.next_non_empty2(); ) {
+ Node* n = mms.memory();
+ if (n != mms.memory2() && !(n->is_Proj() && n->in(0) == alloc->initialization())) {
+ assert(n->is_Store(), "what else?");
+ no_interfering_store = false;
+ break;
+ }
+ }
+ } else {
+ for (MergeMemStream mms(merged_memory()); mms.next_non_empty(); ) {
+ Node* n = mms.memory();
+ if (n != mem && !(n->is_Proj() && n->in(0) == alloc->initialization())) {
+ assert(n->is_Store(), "what else?");
+ no_interfering_store = false;
+ break;
+ }
+ }
+ }
+
+ if (no_interfering_store) {
+ JVMState* old_jvms = alloc->jvms()->clone_shallow(C);
+ uint size = alloc->req();
+ SafePointNode* sfpt = new SafePointNode(size, old_jvms);
+ old_jvms->set_map(sfpt);
+ for (uint i = 0; i < size; i++) {
+ sfpt->init_req(i, alloc->in(i));
+ }
+ // re-push array length for deoptimization
+ sfpt->ins_req(old_jvms->stkoff() + old_jvms->sp(), alloc->in(AllocateNode::ALength));
+ old_jvms->set_sp(old_jvms->sp()+1);
+ old_jvms->set_monoff(old_jvms->monoff()+1);
+ old_jvms->set_scloff(old_jvms->scloff()+1);
+ old_jvms->set_endoff(old_jvms->endoff()+1);
+ old_jvms->set_should_reexecute(true);
+
+ sfpt->set_i_o(map()->i_o());
+ sfpt->set_memory(map()->memory());
+ sfpt->set_control(map()->control());
+
+ JVMState* saved_jvms = jvms();
+ saved_reexecute_sp = _reexecute_sp;
+
+ set_jvms(sfpt->jvms());
+ _reexecute_sp = jvms()->sp();
+
+ return saved_jvms;
+ }
+ }
+ }
+ return NULL;
+}
+
+// In case of a deoptimization, we restart execution at the
+// allocation, allocating a new array. We would leave an uninitialized
+// array in the heap that GCs wouldn't expect. Move the allocation
+// after the traps so we don't allocate the array if we
+// deoptimize. This is possible because tightly_coupled_allocation()
+// guarantees there's no observer of the allocated array at this point
+// and the control flow is simple enough.
+void LibraryCallKit::arraycopy_move_allocation_here(AllocateArrayNode* alloc, Node* dest, JVMState* saved_jvms, int saved_reexecute_sp) {
+ if (saved_jvms != NULL && !stopped()) {
+ assert(alloc != NULL, "only with a tightly coupled allocation");
+ // restore JVM state to the state at the arraycopy
+ saved_jvms->map()->set_control(map()->control());
+ assert(saved_jvms->map()->memory() == map()->memory(), "memory state changed?");
+ assert(saved_jvms->map()->i_o() == map()->i_o(), "IO state changed?");
+ // If we've improved the types of some nodes (null check) while
+ // emitting the guards, propagate them to the current state
+ map()->replaced_nodes().apply(saved_jvms->map());
+ set_jvms(saved_jvms);
+ _reexecute_sp = saved_reexecute_sp;
+
+ // Remove the allocation from above the guards
+ CallProjections callprojs;
+ alloc->extract_projections(&callprojs, true);
+ InitializeNode* init = alloc->initialization();
+ Node* alloc_mem = alloc->in(TypeFunc::Memory);
+ C->gvn_replace_by(callprojs.fallthrough_ioproj, alloc->in(TypeFunc::I_O));
+ C->gvn_replace_by(init->proj_out(TypeFunc::Memory), alloc_mem);
+ C->gvn_replace_by(init->proj_out(TypeFunc::Control), alloc->in(0));
+
+ // move the allocation here (after the guards)
+ _gvn.hash_delete(alloc);
+ alloc->set_req(TypeFunc::Control, control());
+ alloc->set_req(TypeFunc::I_O, i_o());
+ Node *mem = reset_memory();
+ set_all_memory(mem);
+ alloc->set_req(TypeFunc::Memory, mem);
+ set_control(init->proj_out(TypeFunc::Control));
+ set_i_o(callprojs.fallthrough_ioproj);
+
+ // Update memory as done in GraphKit::set_output_for_allocation()
+ const TypeInt* length_type = _gvn.find_int_type(alloc->in(AllocateNode::ALength));
+ const TypeOopPtr* ary_type = _gvn.type(alloc->in(AllocateNode::KlassNode))->is_klassptr()->as_instance_type();
+ if (ary_type->isa_aryptr() && length_type != NULL) {
+ ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
+ }
+ const TypePtr* telemref = ary_type->add_offset(Type::OffsetBot);
+ int elemidx = C->get_alias_index(telemref);
+ set_memory(init->proj_out(TypeFunc::Memory), Compile::AliasIdxRaw);
+ set_memory(init->proj_out(TypeFunc::Memory), elemidx);
+
+ Node* allocx = _gvn.transform(alloc);
+ assert(allocx == alloc, "where has the allocation gone?");
+ assert(dest->is_CheckCastPP(), "not an allocation result?");
+
+ _gvn.hash_delete(dest);
+ dest->set_req(0, control());
+ Node* destx = _gvn.transform(dest);
+ assert(destx == dest, "where has the allocation result gone?");
+ }
+}
+
+
//------------------------------inline_arraycopy-----------------------
// public static native void java.lang.System.arraycopy(Object src, int srcPos,
// Object dest, int destPos,
@@ -4686,6 +4853,19 @@
Node* dest_offset = argument(3); // type: int
Node* length = argument(4); // type: int
+
+ // Check for allocation before we add nodes that would confuse
+ // tightly_coupled_allocation()
+ AllocateArrayNode* alloc = tightly_coupled_allocation(dest, NULL);
+
+ int saved_reexecute_sp = -1;
+ JVMState* saved_jvms = arraycopy_restore_alloc_state(alloc, saved_reexecute_sp);
+ // See arraycopy_restore_alloc_state() comment
+ // if alloc == NULL we don't have to worry about a tightly coupled allocation so we can emit all needed guards
+ // if saved_jvms != NULL (then alloc != NULL) then we can handle guards and a tightly coupled allocation
+ // if saved_jvms == NULL and alloc != NULL, we can’t emit any guards
+ bool can_emit_guards = (alloc == NULL || saved_jvms != NULL);
+
// The following tests must be performed
// (1) src and dest are arrays.
// (2) src and dest arrays must have elements of the same BasicType
@@ -4699,42 +4879,20 @@
// (3) src and dest must not be null.
// always do this here because we need the JVM state for uncommon traps
- src = null_check(src, T_ARRAY);
+ Node* null_ctl = top();
+ src = saved_jvms != NULL ? null_check_oop(src, &null_ctl, true, true) : null_check(src, T_ARRAY);
+ assert(null_ctl->is_top(), "no null control here");
dest = null_check(dest, T_ARRAY);
- // Check for allocation before we add nodes that would confuse
- // tightly_coupled_allocation()
- AllocateArrayNode* alloc = tightly_coupled_allocation(dest, NULL);
-
- ciMethod* trap_method = method();
- int trap_bci = bci();
- SafePointNode* sfpt = NULL;
- if (alloc != NULL) {
- // The JVM state for uncommon traps between the allocation and
- // arraycopy is set to the state before the allocation: if the
- // initialization is performed by the array copy, we don't want to
- // go back to the interpreter with an unitialized array.
- JVMState* old_jvms = alloc->jvms();
- JVMState* jvms = old_jvms->clone_shallow(C);
- uint size = alloc->req();
- sfpt = new SafePointNode(size, jvms);
- jvms->set_map(sfpt);
- for (uint i = 0; i < size; i++) {
- sfpt->init_req(i, alloc->in(i));
- }
- // re-push array length for deoptimization
- sfpt->ins_req(jvms->stkoff() + jvms->sp(), alloc->in(AllocateNode::ALength));
- jvms->set_sp(jvms->sp()+1);
- jvms->set_monoff(jvms->monoff()+1);
- jvms->set_scloff(jvms->scloff()+1);
- jvms->set_endoff(jvms->endoff()+1);
- jvms->set_should_reexecute(true);
-
- sfpt->set_i_o(map()->i_o());
- sfpt->set_memory(map()->memory());
-
- trap_method = jvms->method();
- trap_bci = jvms->bci();
+ if (!can_emit_guards) {
+ // if saved_jvms == NULL and alloc != NULL, we don't emit any
+ // guards but the arraycopy node could still take advantage of a
+ // tightly allocated allocation. tightly_coupled_allocation() is
+ // called again to make sure it takes the null check above into
+ // account: the null check is mandatory and if it caused an
+ // uncommon trap to be emitted then the allocation can't be
+ // considered tightly coupled in this context.
+ alloc = tightly_coupled_allocation(dest, NULL);
}
bool validated = false;
@@ -4753,7 +4911,7 @@
// Is the type for dest from speculation?
bool dest_spec = false;
- if (!has_src || !has_dest) {
+ if ((!has_src || !has_dest) && can_emit_guards) {
// We don't have sufficient type information, let's see if
// speculative types can help. We need to have types for both src
// and dest so that it pays off.
@@ -4782,7 +4940,7 @@
if (could_have_src && could_have_dest) {
// This is going to pay off so emit the required guards
if (!has_src) {
- src = maybe_cast_profiled_obj(src, src_k, true, sfpt);
+ src = maybe_cast_profiled_obj(src, src_k, true);
src_type = _gvn.type(src);
top_src = src_type->isa_aryptr();
has_src = (top_src != NULL && top_src->klass() != NULL);
@@ -4798,7 +4956,7 @@
}
}
- if (has_src && has_dest) {
+ if (has_src && has_dest && can_emit_guards) {
BasicType src_elem = top_src->klass()->as_array_klass()->element_type()->basic_type();
BasicType dest_elem = top_dest->klass()->as_array_klass()->element_type()->basic_type();
if (src_elem == T_ARRAY) src_elem = T_OBJECT;
@@ -4830,7 +4988,7 @@
if (could_have_src && could_have_dest) {
// If we can have both exact types, emit the missing guards
if (could_have_src && !src_spec) {
- src = maybe_cast_profiled_obj(src, src_k, true, sfpt);
+ src = maybe_cast_profiled_obj(src, src_k, true);
}
if (could_have_dest && !dest_spec) {
dest = maybe_cast_profiled_obj(dest, dest_k, true);
@@ -4839,7 +4997,16 @@
}
}
- if (!C->too_many_traps(trap_method, trap_bci, Deoptimization::Reason_intrinsic) && !src->is_top() && !dest->is_top()) {
+ ciMethod* trap_method = method();
+ int trap_bci = bci();
+ if (saved_jvms != NULL) {
+ trap_method = alloc->jvms()->method();
+ trap_bci = alloc->jvms()->bci();
+ }
+
+ if (!C->too_many_traps(trap_method, trap_bci, Deoptimization::Reason_intrinsic) &&
+ can_emit_guards &&
+ !src->is_top() && !dest->is_top()) {
// validate arguments: enables transformation the ArrayCopyNode
validated = true;
@@ -4875,28 +5042,13 @@
Node* not_subtype_ctrl = gen_subtype_check(src_klass, dest_klass);
if (not_subtype_ctrl != top()) {
- if (sfpt != NULL) {
- GraphKit kit(sfpt->jvms());
- PreserveJVMState pjvms(&kit);
- kit.set_control(not_subtype_ctrl);
- kit.uncommon_trap(Deoptimization::Reason_intrinsic,
- Deoptimization::Action_make_not_entrant);
- assert(kit.stopped(), "Should be stopped");
- } else {
- PreserveJVMState pjvms(this);
- set_control(not_subtype_ctrl);
- uncommon_trap(Deoptimization::Reason_intrinsic,
- Deoptimization::Action_make_not_entrant);
- assert(stopped(), "Should be stopped");
- }
+ PreserveJVMState pjvms(this);
+ set_control(not_subtype_ctrl);
+ uncommon_trap(Deoptimization::Reason_intrinsic,
+ Deoptimization::Action_make_not_entrant);
+ assert(stopped(), "Should be stopped");
}
- if (sfpt != NULL) {
- GraphKit kit(sfpt->jvms());
- kit.set_control(_gvn.transform(slow_region));
- kit.uncommon_trap(Deoptimization::Reason_intrinsic,
- Deoptimization::Action_make_not_entrant);
- assert(kit.stopped(), "Should be stopped");
- } else {
+ {
PreserveJVMState pjvms(this);
set_control(_gvn.transform(slow_region));
uncommon_trap(Deoptimization::Reason_intrinsic,
@@ -4905,6 +5057,8 @@
}
}
+ arraycopy_move_allocation_here(alloc, dest, saved_jvms, saved_reexecute_sp);
+
if (stopped()) {
return true;
}
@@ -5870,12 +6024,46 @@
Deoptimization::Action_reinterpret);
return true;
}
+
+ // result is a boolean (0 or 1) and its profile (false_cnt & true_cnt)
+ // is a number of each value occurrences.
+ Node* result = argument(0);
+ if (false_cnt == 0 || true_cnt == 0) {
+ // According to profile, one value has been never seen.
+ int expected_val = (false_cnt == 0) ? 1 : 0;
+
+ Node* cmp = _gvn.transform(new CmpINode(result, intcon(expected_val)));
+ Node* test = _gvn.transform(new BoolNode(cmp, BoolTest::eq));
+
+ IfNode* check = create_and_map_if(control(), test, PROB_ALWAYS, COUNT_UNKNOWN);
+ Node* fast_path = _gvn.transform(new IfTrueNode(check));
+ Node* slow_path = _gvn.transform(new IfFalseNode(check));
+
+ { // Slow path: uncommon trap for never seen value and then reexecute
+ // MethodHandleImpl::profileBoolean() to bump the count, so JIT knows
+ // the value has been seen at least once.
+ PreserveJVMState pjvms(this);
+ PreserveReexecuteState preexecs(this);
+ jvms()->set_should_reexecute(true);
+
+ set_control(slow_path);
+ set_i_o(i_o());
+
+ uncommon_trap_exact(Deoptimization::Reason_intrinsic,
+ Deoptimization::Action_reinterpret);
+ }
+ // The guard for never seen value enables sharpening of the result and
+ // returning a constant. It allows to eliminate branches on the same value
+ // later on.
+ set_control(fast_path);
+ result = intcon(expected_val);
+ }
// Stop profiling.
- // MethodHandleImpl::profileBoolean() has profiling logic in it's bytecode.
- // By replacing method's body with profile data (represented as ProfileBooleanNode
+ // MethodHandleImpl::profileBoolean() has profiling logic in its bytecode.
+ // By replacing method body with profile data (represented as ProfileBooleanNode
// on IR level) we effectively disable profiling.
// It enables full speed execution once optimized code is generated.
- Node* profile = _gvn.transform(new ProfileBooleanNode(argument(0), false_cnt, true_cnt));
+ Node* profile = _gvn.transform(new ProfileBooleanNode(result, false_cnt, true_cnt));
C->record_for_igvn(profile);
set_result(profile);
return true;
@@ -5888,3 +6076,9 @@
return false;
}
}
+
+bool LibraryCallKit::inline_isCompileConstant() {
+ Node* n = argument(0);
+ set_result(n->is_Con() ? intcon(1) : intcon(0));
+ return true;
+}
--- a/hotspot/src/share/vm/opto/loopTransform.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -38,6 +38,7 @@
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
+#include "opto/vectornode.hpp"
//------------------------------is_loop_exit-----------------------------------
// Given an IfNode, return the loop-exiting projection or NULL if both
@@ -1524,6 +1525,44 @@
}
}
+void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
+ if (SuperWordReductions == false) return;
+
+ CountedLoopNode* loop_head = loop->_head->as_CountedLoop();
+ if (loop_head->unrolled_count() > 1) {
+ return;
+ }
+
+ Node* trip_phi = loop_head->phi();
+ for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
+ Node* phi = loop_head->fast_out(i);
+ if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
+ // For definitions which are loop inclusive and not tripcounts.
+ Node* def_node = phi->in(LoopNode::LoopBackControl);
+
+ if (def_node != NULL) {
+ Node* n_ctrl = get_ctrl(def_node);
+ if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
+ // Now test it to see if it fits the standard pattern for a reduction operator.
+ int opc = def_node->Opcode();
+ if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
+ if (!def_node->is_reduction()) { // Not marked yet
+ // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
+ for (unsigned j = 1; j < def_node->req(); j++) {
+ Node* in = def_node->in(j);
+ if (in == phi) {
+ def_node->add_flag(Node::Flag_is_reduction);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
//------------------------------dominates_backedge---------------------------------
// Returns true if ctrl is executed on every complete iteration
bool IdealLoopTree::dominates_backedge(Node* ctrl) {
@@ -2361,8 +2400,10 @@
// an even number of trips). If we are peeling, we might enable some RCE
// and we'd rather unroll the post-RCE'd loop SO... do not unroll if
// peeling.
- if (should_unroll && !should_peel)
- phase->do_unroll(this,old_new, true);
+ if (should_unroll && !should_peel) {
+ phase->mark_reductions(this);
+ phase->do_unroll(this, old_new, true);
+ }
// Adjust the pre-loop limits to align the main body
// iterations.
--- a/hotspot/src/share/vm/opto/loopnode.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -872,6 +872,9 @@
// Unroll the loop body one step - make each trip do 2 iterations.
void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
+ // Mark vector reduction candidates before loop unrolling
+ void mark_reductions( IdealLoopTree *loop );
+
// Return true if exp is a constant times an induction var
bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
--- a/hotspot/src/share/vm/opto/loopopts.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/loopopts.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -241,8 +241,8 @@
ProjNode* dp_proj = dp->as_Proj();
ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
if (exclude_loop_predicate &&
- (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) ||
- unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check))) {
+ (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
+ unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
// If this is a range check (IfNode::is_range_check), do not
// reorder because Compile::allow_range_check_smearing might have
// changed the check.
--- a/hotspot/src/share/vm/opto/macro.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/macro.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -2535,7 +2535,7 @@
(bol->_test._test == BoolTest::ne), "");
IfNode* ifn = bol->unique_out()->as_If();
assert((ifn->outcnt() == 2) &&
- ifn->proj_out(1)->is_uncommon_trap_proj(Deoptimization::Reason_rtm_state_change), "");
+ ifn->proj_out(1)->is_uncommon_trap_proj(Deoptimization::Reason_rtm_state_change) != NULL, "");
#endif
Node* repl = n->in(1);
if (!_has_locks) {
--- a/hotspot/src/share/vm/opto/multnode.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/multnode.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -150,59 +150,67 @@
}
//-------------------------------is_uncommon_trap_proj----------------------------
-// Return true if proj is the form of "proj->[region->..]call_uct"
-bool ProjNode::is_uncommon_trap_proj(Deoptimization::DeoptReason reason) {
+// Return uncommon trap call node if proj is for "proj->[region->..]call_uct"
+// NULL otherwise
+CallStaticJavaNode* ProjNode::is_uncommon_trap_proj(Deoptimization::DeoptReason reason) {
int path_limit = 10;
Node* out = this;
for (int ct = 0; ct < path_limit; ct++) {
out = out->unique_ctrl_out();
if (out == NULL)
- return false;
+ return NULL;
if (out->is_CallStaticJava()) {
- int req = out->as_CallStaticJava()->uncommon_trap_request();
+ CallStaticJavaNode* call = out->as_CallStaticJava();
+ int req = call->uncommon_trap_request();
if (req != 0) {
Deoptimization::DeoptReason trap_reason = Deoptimization::trap_request_reason(req);
if (trap_reason == reason || reason == Deoptimization::Reason_none) {
- return true;
+ return call;
}
}
- return false; // don't do further after call
+ return NULL; // don't do further after call
}
if (out->Opcode() != Op_Region)
- return false;
+ return NULL;
}
- return false;
+ return NULL;
}
//-------------------------------is_uncommon_trap_if_pattern-------------------------
-// Return true for "if(test)-> proj -> ...
-// |
-// V
-// other_proj->[region->..]call_uct"
-//
+// Return uncommon trap call node for "if(test)-> proj -> ...
+// |
+// V
+// other_proj->[region->..]call_uct"
+// NULL otherwise
// "must_reason_predicate" means the uct reason must be Reason_predicate
-bool ProjNode::is_uncommon_trap_if_pattern(Deoptimization::DeoptReason reason) {
+CallStaticJavaNode* ProjNode::is_uncommon_trap_if_pattern(Deoptimization::DeoptReason reason) {
Node *in0 = in(0);
- if (!in0->is_If()) return false;
+ if (!in0->is_If()) return NULL;
// Variation of a dead If node.
- if (in0->outcnt() < 2) return false;
+ if (in0->outcnt() < 2) return NULL;
IfNode* iff = in0->as_If();
// we need "If(Conv2B(Opaque1(...)))" pattern for reason_predicate
if (reason != Deoptimization::Reason_none) {
if (iff->in(1)->Opcode() != Op_Conv2B ||
iff->in(1)->in(1)->Opcode() != Op_Opaque1) {
- return false;
+ return NULL;
}
}
ProjNode* other_proj = iff->proj_out(1-_con);
if (other_proj == NULL) // Should never happen, but make Parfait happy.
- return false;
- if (other_proj->is_uncommon_trap_proj(reason)) {
+ return NULL;
+ CallStaticJavaNode* call = other_proj->is_uncommon_trap_proj(reason);
+ if (call != NULL) {
assert(reason == Deoptimization::Reason_none ||
Compile::current()->is_predicate_opaq(iff->in(1)->in(1)), "should be on the list");
- return true;
+ return call;
}
- return false;
+ return NULL;
}
+
+ProjNode* ProjNode::other_if_proj() const {
+ assert(_con == 0 || _con == 1, "not an if?");
+ return in(0)->as_If()->proj_out(1-_con);
+}
--- a/hotspot/src/share/vm/opto/multnode.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/multnode.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -89,13 +89,18 @@
virtual void dump_spec(outputStream *st) const;
#endif
- // Return true if proj is for "proj->[region->..]call_uct"
- bool is_uncommon_trap_proj(Deoptimization::DeoptReason reason);
- // Return true for "if(test)-> proj -> ...
- // |
- // V
- // other_proj->[region->..]call_uct"
- bool is_uncommon_trap_if_pattern(Deoptimization::DeoptReason reason);
+ // Return uncommon trap call node if proj is for "proj->[region->..]call_uct"
+ // NULL otherwise
+ CallStaticJavaNode* is_uncommon_trap_proj(Deoptimization::DeoptReason reason);
+ // Return uncommon trap call node for "if(test)-> proj -> ...
+ // |
+ // V
+ // other_proj->[region->..]call_uct"
+ // NULL otherwise
+ CallStaticJavaNode* is_uncommon_trap_if_pattern(Deoptimization::DeoptReason reason);
+
+ // Return other proj node when this is a If proj node
+ ProjNode* other_if_proj() const;
};
#endif // SHARE_VM_OPTO_MULTNODE_HPP
--- a/hotspot/src/share/vm/opto/node.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/node.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -2069,7 +2069,7 @@
//--------------------------unique_ctrl_out------------------------------
// Return the unique control out if only one. Null if none or more than one.
-Node* Node::unique_ctrl_out() {
+Node* Node::unique_ctrl_out() const {
Node* found = NULL;
for (uint i = 0; i < outcnt(); i++) {
Node* use = raw_out(i);
--- a/hotspot/src/share/vm/opto/node.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/node.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -673,7 +673,8 @@
Flag_avoid_back_to_back_before = Flag_may_be_short_branch << 1,
Flag_avoid_back_to_back_after = Flag_avoid_back_to_back_before << 1,
Flag_has_call = Flag_avoid_back_to_back_after << 1,
- Flag_is_expensive = Flag_has_call << 1,
+ Flag_is_reduction = Flag_has_call << 1,
+ Flag_is_expensive = Flag_is_reduction << 1,
_max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination
};
@@ -701,6 +702,10 @@
const jushort flags() const { return _flags; }
+ void add_flag(jushort fl) { init_flags(fl); }
+
+ void remove_flag(jushort fl) { clear_flag(fl); }
+
// Return a dense integer opcode number
virtual int Opcode() const;
@@ -852,6 +857,10 @@
// The node is expensive: the best control is set during loop opts
bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL; }
+ // An arithmetic node which accumulates a data in a loop.
+ // It must have the loop's phi as input and provide a def to the phi.
+ bool is_reduction() const { return (_flags & Flag_is_reduction) != 0; }
+
//----------------- Optimization
// Get the worst-case Type output for this Node.
@@ -931,7 +940,7 @@
Node* find_similar(int opc);
// Return the unique control out if only one. Null if none or more than one.
- Node* unique_ctrl_out();
+ Node* unique_ctrl_out() const;
//----------------- Code Generation
--- a/hotspot/src/share/vm/opto/subnode.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/subnode.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -275,6 +275,8 @@
mask commute( ) const { return mask("032147658"[_test]-'0'); }
mask negate( ) const { return mask(_test^4); }
bool is_canonical( ) const { return (_test == BoolTest::ne || _test == BoolTest::lt || _test == BoolTest::le || _test == BoolTest::overflow); }
+ bool is_less( ) const { return _test == BoolTest::lt || _test == BoolTest::le; }
+ bool is_greater( ) const { return _test == BoolTest::gt || _test == BoolTest::ge; }
void dump_on(outputStream *st) const;
};
--- a/hotspot/src/share/vm/opto/superword.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/superword.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -65,7 +65,8 @@
_lpt(NULL), // loop tree node
_lp(NULL), // LoopNode
_bb(NULL), // basic block
- _iv(NULL) // induction var
+ _iv(NULL), // induction var
+ _race_possible(false) // cases where SDMU is true
{}
//------------------------------transform_loop---------------------------
@@ -145,7 +146,6 @@
void SuperWord::SLP_extract() {
// Ready the block
-
if (!construct_bb())
return; // Exit if no interesting nodes or complex graph.
@@ -449,11 +449,13 @@
int preloop_stride = pre_end->stride_con();
int span = preloop_stride * p.scale_in_bytes();
-
- // Stride one accesses are alignable.
- if (ABS(span) == p.memory_size())
+ int mem_size = p.memory_size();
+ int offset = p.offset_in_bytes();
+ // Stride one accesses are alignable if offset is aligned to memory operation size.
+ // Offset can be unaligned when UseUnalignedAccesses is used.
+ if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
return true;
-
+ }
// If initial offset from start of object is computable,
// compute alignment within the vector.
int vw = vector_width_in_bytes(p.mem());
@@ -463,7 +465,7 @@
if (init_nd->is_Con() && p.invar() == NULL) {
int init = init_nd->bottom_type()->is_int()->get_con();
- int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes();
+ int init_offset = init * p.scale_in_bytes() + offset;
assert(init_offset >= 0, "positive offset from object start");
if (span > 0) {
@@ -640,7 +642,7 @@
}
if (isomorphic(s1, s2)) {
- if (independent(s1, s2)) {
+ if (independent(s1, s2) || reduction(s1, s2)) {
if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
int s1_align = alignment(s1);
@@ -718,6 +720,28 @@
return independent_path(shallow, deep);
}
+//------------------------------reduction---------------------------
+// Is there a data path between s1 and s2 and the nodes reductions?
+bool SuperWord::reduction(Node* s1, Node* s2) {
+ bool retValue = false;
+ int d1 = depth(s1);
+ int d2 = depth(s2);
+ if (d1 + 1 == d2) {
+ if (s1->is_reduction() && s2->is_reduction()) {
+ // This is an ordered set, so s1 should define s2
+ for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
+ Node* t1 = s1->fast_out(i);
+ if (t1 == s2) {
+ // both nodes are reductions and connected
+ retValue = true;
+ }
+ }
+ }
+ }
+
+ return retValue;
+}
+
//------------------------------independent_path------------------------------
// Helper for independent
bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
@@ -761,6 +785,7 @@
void SuperWord::extend_packlist() {
bool changed;
do {
+ packset_sort(_packset.length());
changed = false;
for (int i = 0; i < _packset.length(); i++) {
Node_List* p = _packset.at(i);
@@ -769,6 +794,13 @@
}
} while (changed);
+ if (_race_possible) {
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p = _packset.at(i);
+ order_def_uses(p);
+ }
+ }
+
#ifndef PRODUCT
if (TraceSuperWord) {
tty->print_cr("\nAfter extend_packlist");
@@ -825,10 +857,12 @@
int align = alignment(s1);
int savings = -1;
+ int num_s1_uses = 0;
Node* u1 = NULL;
Node* u2 = NULL;
for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
Node* t1 = s1->fast_out(i);
+ num_s1_uses++;
if (!in_bb(t1)) continue;
for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
Node* t2 = s2->fast_out(j);
@@ -845,6 +879,9 @@
}
}
}
+ if (num_s1_uses > 1) {
+ _race_possible = true;
+ }
if (savings >= 0) {
Node_List* pair = new Node_List();
pair->push(u1);
@@ -856,9 +893,64 @@
return changed;
}
+//------------------------------order_def_uses---------------------------
+// For extended packsets, ordinally arrange uses packset by major component
+void SuperWord::order_def_uses(Node_List* p) {
+ Node* s1 = p->at(0);
+
+ if (s1->is_Store()) return;
+
+ // reductions are always managed beforehand
+ if (s1->is_reduction()) return;
+
+ for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
+ Node* t1 = s1->fast_out(i);
+
+ // Only allow operand swap on commuting operations
+ if (!t1->is_Add() && !t1->is_Mul()) {
+ break;
+ }
+
+ // Now find t1's packset
+ Node_List* p2 = NULL;
+ for (int j = 0; j < _packset.length(); j++) {
+ p2 = _packset.at(j);
+ Node* first = p2->at(0);
+ if (t1 == first) {
+ break;
+ }
+ p2 = NULL;
+ }
+ // Arrange all sub components by the major component
+ if (p2 != NULL) {
+ for (uint j = 1; j < p->size(); j++) {
+ Node* d1 = p->at(j);
+ Node* u1 = p2->at(j);
+ opnd_positions_match(s1, t1, d1, u1);
+ }
+ }
+ }
+}
+
//---------------------------opnd_positions_match-------------------------
// Is the use of d1 in u1 at the same operand position as d2 in u2?
bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
+ // check reductions to see if they are marshalled to represent the reduction
+ // operator in a specified opnd
+ if (u1->is_reduction() && u2->is_reduction()) {
+ // ensure reductions have phis and reduction definitions feeding the 1st operand
+ Node* first = u1->in(2);
+ if (first->is_Phi() || first->is_reduction()) {
+ u1->swap_edges(1, 2);
+ }
+ // ensure reductions have phis and reduction definitions feeding the 1st operand
+ first = u2->in(2);
+ if (first->is_Phi() || first->is_reduction()) {
+ u2->swap_edges(1, 2);
+ }
+ return true;
+ }
+
uint ct = u1->req();
if (ct != u2->req()) return false;
uint i1 = 0;
@@ -940,7 +1032,8 @@
for (int i = 0; i < _packset.length(); i++) {
Node_List* p1 = _packset.at(i);
if (p1 == NULL) continue;
- for (int j = 0; j < _packset.length(); j++) {
+ // Because of sorting we can start at i + 1
+ for (int j = i + 1; j < _packset.length(); j++) {
Node_List* p2 = _packset.at(j);
if (p2 == NULL) continue;
if (i == j) continue;
@@ -1067,8 +1160,19 @@
//------------------------------implemented---------------------------
// Can code be generated for pack p?
bool SuperWord::implemented(Node_List* p) {
+ bool retValue = false;
Node* p0 = p->at(0);
- return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
+ if (p0 != NULL) {
+ int opc = p0->Opcode();
+ uint size = p->size();
+ if (p0->is_reduction()) {
+ const Type *arith_type = p0->bottom_type();
+ retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
+ } else {
+ retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
+ }
+ }
+ return retValue;
}
//------------------------------same_inputs--------------------------
@@ -1102,6 +1206,18 @@
if (!is_vector_use(p0, i))
return false;
}
+ // Check if reductions are connected
+ if (p0->is_reduction()) {
+ Node* second_in = p0->in(2);
+ Node_List* second_pk = my_pack(second_in);
+ if (second_pk == NULL) {
+ // Remove reduction flag if no parent pack, it is not profitable
+ p0->remove_flag(Node::Flag_is_reduction);
+ return false;
+ } else if (second_pk->size() != p->size()) {
+ return false;
+ }
+ }
if (VectorNode::is_shift(p0)) {
// For now, return false if shift count is vector or not scalar promotion
// case (different shift counts) because it is not supported yet.
@@ -1123,6 +1239,9 @@
for (uint k = 0; k < use->req(); k++) {
Node* n = use->in(k);
if (def == n) {
+ // reductions can be loop carried dependences
+ if (def->is_reduction() && use->is_Phi())
+ continue;
if (!is_vector_use(use, k)) {
return false;
}
@@ -1407,16 +1526,33 @@
vlen_in_bytes = vn->as_StoreVector()->memory_size();
} else if (n->req() == 3) {
// Promote operands to vector
- Node* in1 = vector_opd(p, 1);
+ Node* in1 = NULL;
+ bool node_isa_reduction = n->is_reduction();
+ if (node_isa_reduction) {
+ // the input to the first reduction operation is retained
+ in1 = low_adr->in(1);
+ } else {
+ in1 = vector_opd(p, 1);
+ }
Node* in2 = vector_opd(p, 2);
- if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) {
+ if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
// Move invariant vector input into second position to avoid register spilling.
Node* tmp = in1;
in1 = in2;
in2 = tmp;
}
- vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
- vlen_in_bytes = vn->as_Vector()->length_in_bytes();
+ if (node_isa_reduction) {
+ const Type *arith_type = n->bottom_type();
+ vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type());
+ if (in2->is_Load()) {
+ vlen_in_bytes = in2->as_LoadVector()->memory_size();
+ } else {
+ vlen_in_bytes = in2->as_Vector()->length_in_bytes();
+ }
+ } else {
+ vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
+ vlen_in_bytes = vn->as_Vector()->length_in_bytes();
+ }
} else {
ShouldNotReachHere();
}
@@ -1556,6 +1692,8 @@
_n_idx_list.pop();
Node* def = use->in(idx);
+ if (def->is_reduction()) continue;
+
// Insert extract operation
_igvn.hash_delete(def);
int def_pos = alignment(def) / data_size(def);
@@ -1576,6 +1714,7 @@
bool SuperWord::is_vector_use(Node* use, int u_idx) {
Node_List* u_pk = my_pack(use);
if (u_pk == NULL) return false;
+ if (use->is_reduction()) return true;
Node* def = use->in(u_idx);
Node_List* d_pk = my_pack(def);
if (d_pk == NULL) {
@@ -1613,7 +1752,7 @@
// by the visited and post_visited sets,
// and count number of nodes in block.
int bb_ct = 0;
- for (uint i = 0; i < lpt()->_body.size(); i++ ) {
+ for (uint i = 0; i < lpt()->_body.size(); i++) {
Node *n = lpt()->_body.at(i);
set_bb_idx(n, i); // Create a temporary map
if (in_bb(n)) {
@@ -1674,6 +1813,7 @@
// Do a depth first walk over out edges
int rpo_idx = bb_ct - 1;
int size;
+ int reduction_uses = 0;
while ((size = _stk.length()) > 0) {
Node* n = _stk.top(); // Leave node on stack
if (!visited_test_set(n)) {
@@ -1685,6 +1825,14 @@
if (in_bb(use) && !visited_test(use) &&
// Don't go around backedge
(!use->is_Phi() || n == entry)) {
+ if (use->is_reduction()) {
+ // First see if we can map the reduction on the given system we are on, then
+ // make a data entry operation for each reduction we see.
+ BasicType bt = use->bottom_type()->basic_type();
+ if (ReductionNode::implemented(use->Opcode(), Matcher::min_vector_size(bt), bt)) {
+ reduction_uses++;
+ }
+ }
_stk.push(use);
}
}
@@ -1708,7 +1856,8 @@
set_bb_idx(n, j);
}
- initialize_bb(); // Ensure extra info is allocated.
+ // Ensure extra info is allocated.
+ initialize_bb();
#ifndef PRODUCT
if (TraceSuperWord) {
@@ -1726,7 +1875,7 @@
}
#endif
assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
- return (_mem_slice_head.length() > 0) || (_data_entry.length() > 0);
+ return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0);
}
//------------------------------initialize_bb---------------------------
@@ -1959,6 +2108,27 @@
_packset.remove_at(pos);
}
+void SuperWord::packset_sort(int n) {
+ // simple bubble sort so that we capitalize with O(n) when its already sorted
+ while (n != 0) {
+ bool swapped = false;
+ for (int i = 1; i < n; i++) {
+ Node_List* q_low = _packset.at(i-1);
+ Node_List* q_i = _packset.at(i);
+
+ // only swap when we find something to swap
+ if (alignment(q_low->at(0)) > alignment(q_i->at(0))) {
+ Node_List* t = q_i;
+ *(_packset.adr_at(i)) = q_low;
+ *(_packset.adr_at(i-1)) = q_i;
+ swapped = true;
+ }
+ }
+ if (swapped == false) break;
+ n--;
+ }
+}
+
//------------------------------executed_first---------------------------
// Return the node executed first in pack p. Uses the RPO block list
// to determine order.
--- a/hotspot/src/share/vm/opto/superword.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/superword.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -249,6 +249,7 @@
LoopNode* _lp; // Current LoopNode
Node* _bb; // Current basic block
PhiNode* _iv; // Induction var
+ bool _race_possible; // In cases where SDMU is true
// Accessors
Arena* arena() { return _arena; }
@@ -337,6 +338,8 @@
bool isomorphic(Node* s1, Node* s2);
// Is there no data path from s1 to s2 or s2 to s1?
bool independent(Node* s1, Node* s2);
+ // Is there a data path between s1 and s2 and both are reductions?
+ bool reduction(Node* s1, Node* s2);
// Helper for independent
bool independent_path(Node* shallow, Node* deep, uint dp=0);
void set_alignment(Node* s1, Node* s2, int align);
@@ -347,6 +350,8 @@
bool follow_use_defs(Node_List* p);
// Extend the packset by visiting uses of nodes in pack p
bool follow_def_uses(Node_List* p);
+ // For extended packsets, ordinally arrange uses packset by major component
+ void order_def_uses(Node_List* p);
// Estimate the savings from executing s1 and s2 as a pack
int est_savings(Node* s1, Node* s2);
int adjacent_profit(Node* s1, Node* s2);
@@ -419,9 +424,12 @@
void print_bb();
void print_stmt(Node* s);
char* blank(uint depth);
+
+ void packset_sort(int n);
};
+
//------------------------------SWPointer---------------------------
// Information about an address for dependence checking and vector alignment
class SWPointer VALUE_OBJ_CLASS_SPEC {
--- a/hotspot/src/share/vm/opto/type.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/type.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -4083,7 +4083,9 @@
(tap->_klass_is_exact && !tap->klass()->is_subtype_of(klass())) ||
// 'this' is exact and super or unrelated:
(this->_klass_is_exact && !klass()->is_subtype_of(tap->klass())))) {
- tary = TypeAry::make(Type::BOTTOM, tary->_size, tary->_stable);
+ if (above_centerline(ptr)) {
+ tary = TypeAry::make(Type::BOTTOM, tary->_size, tary->_stable);
+ }
return make(NotNull, NULL, tary, lazy_klass, false, off, InstanceBot, speculative, depth);
}
--- a/hotspot/src/share/vm/opto/vectornode.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/vectornode.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -250,7 +250,6 @@
int vopc = VectorNode::opcode(opc, bt);
// This method should not be called for unimplemented vectors.
guarantee(vopc > 0, err_msg_res("Vector for '%s' is not implemented", NodeClassNames[opc]));
-
switch (vopc) {
case Op_AddVB: return new AddVBNode(n1, n2, vt);
case Op_AddVS: return new AddVSNode(n1, n2, vt);
@@ -441,3 +440,72 @@
return NULL;
}
+int ReductionNode::opcode(int opc, BasicType bt) {
+ int vopc = opc;
+ switch (opc) {
+ case Op_AddI:
+ assert(bt == T_INT, "must be");
+ vopc = Op_AddReductionVI;
+ break;
+ case Op_AddL:
+ assert(bt == T_LONG, "must be");
+ vopc = Op_AddReductionVL;
+ break;
+ case Op_AddF:
+ assert(bt == T_FLOAT, "must be");
+ vopc = Op_AddReductionVF;
+ break;
+ case Op_AddD:
+ assert(bt == T_DOUBLE, "must be");
+ vopc = Op_AddReductionVD;
+ break;
+ case Op_MulI:
+ assert(bt == T_INT, "must be");
+ vopc = Op_MulReductionVI;
+ break;
+ case Op_MulF:
+ assert(bt == T_FLOAT, "must be");
+ vopc = Op_MulReductionVF;
+ break;
+ case Op_MulD:
+ assert(bt == T_DOUBLE, "must be");
+ vopc = Op_MulReductionVD;
+ break;
+ // TODO: add MulL for targets that support it
+ default:
+ break;
+ }
+ return vopc;
+}
+
+// Return the appropriate reduction node.
+ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, BasicType bt) {
+
+ int vopc = opcode(opc, bt);
+
+ // This method should not be called for unimplemented vectors.
+ guarantee(vopc != opc, err_msg_res("Vector for '%s' is not implemented", NodeClassNames[opc]));
+
+ switch (vopc) {
+ case Op_AddReductionVI: return new AddReductionVINode(ctrl, n1, n2);
+ case Op_AddReductionVL: return new AddReductionVLNode(ctrl, n1, n2);
+ case Op_AddReductionVF: return new AddReductionVFNode(ctrl, n1, n2);
+ case Op_AddReductionVD: return new AddReductionVDNode(ctrl, n1, n2);
+ case Op_MulReductionVI: return new MulReductionVINode(ctrl, n1, n2);
+ case Op_MulReductionVF: return new MulReductionVFNode(ctrl, n1, n2);
+ case Op_MulReductionVD: return new MulReductionVDNode(ctrl, n1, n2);
+ }
+ fatal(err_msg_res("Missed vector creation for '%s'", NodeClassNames[vopc]));
+ return NULL;
+}
+
+bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
+ if (is_java_primitive(bt) &&
+ (vlen > 1) && is_power_of_2(vlen) &&
+ Matcher::vector_size_supported(bt, vlen)) {
+ int vopc = ReductionNode::opcode(opc, bt);
+ return vopc != opc && Matcher::match_rule_supported(vopc);
+ }
+ return false;
+}
+
--- a/hotspot/src/share/vm/opto/vectornode.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/opto/vectornode.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -90,6 +90,37 @@
virtual int Opcode() const;
};
+//------------------------------ReductionNode------------------------------------
+// Perform reduction of a vector
+class ReductionNode : public Node {
+ public:
+ ReductionNode(Node *ctrl, Node* in1, Node* in2) : Node(ctrl, in1, in2) {}
+
+ static ReductionNode* make(int opc, Node *ctrl, Node* in1, Node* in2, BasicType bt);
+ static int opcode(int opc, BasicType bt);
+ static bool implemented(int opc, uint vlen, BasicType bt);
+};
+
+//------------------------------AddReductionVINode--------------------------------------
+// Vector add int as a reduction
+class AddReductionVINode : public ReductionNode {
+public:
+ AddReductionVINode(Node * ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type* bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------AddReductionVLNode--------------------------------------
+// Vector add long as a reduction
+class AddReductionVLNode : public ReductionNode {
+public:
+ AddReductionVLNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type* bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
//------------------------------AddVLNode--------------------------------------
// Vector add long
class AddVLNode : public VectorNode {
@@ -106,6 +137,16 @@
virtual int Opcode() const;
};
+//------------------------------AddReductionVFNode--------------------------------------
+// Vector add float as a reduction
+class AddReductionVFNode : public ReductionNode {
+public:
+ AddReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type* bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
//------------------------------AddVDNode--------------------------------------
// Vector add double
class AddVDNode : public VectorNode {
@@ -114,6 +155,16 @@
virtual int Opcode() const;
};
+//------------------------------AddReductionVDNode--------------------------------------
+// Vector add double as a reduction
+class AddReductionVDNode : public ReductionNode {
+public:
+ AddReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type* bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
//------------------------------SubVBNode--------------------------------------
// Vector subtract byte
class SubVBNode : public VectorNode {
@@ -178,6 +229,16 @@
virtual int Opcode() const;
};
+//------------------------------MulReductionVINode--------------------------------------
+// Vector multiply int as a reduction
+class MulReductionVINode : public ReductionNode {
+public:
+ MulReductionVINode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type* bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
//------------------------------MulVFNode--------------------------------------
// Vector multiply float
class MulVFNode : public VectorNode {
@@ -186,6 +247,16 @@
virtual int Opcode() const;
};
+//------------------------------MulReductionVFNode--------------------------------------
+// Vector multiply float as a reduction
+class MulReductionVFNode : public ReductionNode {
+public:
+ MulReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type* bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
//------------------------------MulVDNode--------------------------------------
// Vector multiply double
class MulVDNode : public VectorNode {
@@ -194,6 +265,16 @@
virtual int Opcode() const;
};
+//------------------------------MulReductionVDNode--------------------------------------
+// Vector multiply double as a reduction
+class MulReductionVDNode : public ReductionNode {
+public:
+ MulReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type* bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
//------------------------------DivVFNode--------------------------------------
// Vector divide float
class DivVFNode : public VectorNode {
--- a/hotspot/src/share/vm/prims/unsafe.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/prims/unsafe.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -324,6 +324,24 @@
#endif // not SUPPORTS_NATIVE_CX8
+UNSAFE_ENTRY(jboolean, Unsafe_isBigEndian0(JNIEnv *env, jobject unsafe))
+ UnsafeWrapper("Unsafe_IsBigEndian0");
+ {
+#ifdef VM_LITTLE_ENDIAN
+ return false;
+#else
+ return true;
+#endif
+ }
+UNSAFE_END
+
+UNSAFE_ENTRY(jint, Unsafe_unalignedAccess0(JNIEnv *env, jobject unsafe))
+ UnsafeWrapper("Unsafe_UnalignedAccess0");
+ {
+ return UseUnalignedAccesses;
+ }
+UNSAFE_END
+
#define DEFINE_GETSETOOP(jboolean, Boolean) \
\
UNSAFE_ENTRY(jboolean, Unsafe_Get##Boolean##140(JNIEnv *env, jobject unsafe, jobject obj, jint offset)) \
@@ -1261,6 +1279,9 @@
{CC"loadFence", CC"()V", FN_PTR(Unsafe_LoadFence)},
{CC"storeFence", CC"()V", FN_PTR(Unsafe_StoreFence)},
{CC"fullFence", CC"()V", FN_PTR(Unsafe_FullFence)},
+
+ {CC"isBigEndian0", CC"()Z", FN_PTR(Unsafe_isBigEndian0)},
+ {CC"unalignedAccess0", CC"()Z", FN_PTR(Unsafe_unalignedAccess0)}
};
#undef CC
--- a/hotspot/src/share/vm/prims/whitebox.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/prims/whitebox.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -819,46 +819,9 @@
mo.notify_all();
WB_END
-void WhiteBox::sweeper_thread_entry(JavaThread* thread, TRAPS) {
- guarantee(WhiteBoxAPI, "internal testing API :: WhiteBox has to be enabled");
- {
- MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
- NMethodSweeper::_should_sweep = true;
- }
- NMethodSweeper::possibly_sweep();
-}
-
-JavaThread* WhiteBox::create_sweeper_thread(TRAPS) {
- // create sweeper thread w/ custom entry -- one iteration instead of loop
- CodeCacheSweeperThread* sweeper_thread = new CodeCacheSweeperThread();
- sweeper_thread->set_entry_point(&WhiteBox::sweeper_thread_entry);
-
- // create j.l.Thread object and associate it w/ sweeper thread
- {
- // inherit deamon property from current thread
- bool is_daemon = java_lang_Thread::is_daemon(JavaThread::current()->threadObj());
-
- HandleMark hm(THREAD);
- Handle thread_group(THREAD, Universe::system_thread_group());
- const char* name = "WB Sweeper thread";
- sweeper_thread->allocate_threadObj(thread_group, name, is_daemon, THREAD);
- }
-
- {
- MutexLocker mu(Threads_lock, THREAD);
- Threads::add(sweeper_thread);
- }
- return sweeper_thread;
-}
-
-WB_ENTRY(jobject, WB_ForceNMethodSweep(JNIEnv* env, jobject o))
- JavaThread* sweeper_thread = WhiteBox::create_sweeper_thread(Thread::current());
- if (sweeper_thread == NULL) {
- return NULL;
- }
- jobject result = JNIHandles::make_local(env, sweeper_thread->threadObj());
- Thread::start(sweeper_thread);
- return result;
+WB_ENTRY(void, WB_ForceNMethodSweep(JNIEnv* env, jobject o))
+ // Force a code cache sweep and block until it finished
+ NMethodSweeper::force_sweep();
WB_END
WB_ENTRY(jboolean, WB_IsInStringTable(JNIEnv* env, jobject o, jstring javaString))
@@ -1402,7 +1365,7 @@
{CC"getCPUFeatures", CC"()Ljava/lang/String;", (void*)&WB_GetCPUFeatures },
{CC"getNMethod", CC"(Ljava/lang/reflect/Executable;Z)[Ljava/lang/Object;",
(void*)&WB_GetNMethod },
- {CC"forceNMethodSweep0", CC"()Ljava/lang/Thread;", (void*)&WB_ForceNMethodSweep },
+ {CC"forceNMethodSweep", CC"()V", (void*)&WB_ForceNMethodSweep },
{CC"allocateCodeBlob", CC"(II)J", (void*)&WB_AllocateCodeBlob },
{CC"freeCodeBlob", CC"(J)V", (void*)&WB_FreeCodeBlob },
{CC"getCodeHeapEntries", CC"(I)[Ljava/lang/Object;",(void*)&WB_GetCodeHeapEntries },
--- a/hotspot/src/share/vm/prims/whitebox.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/prims/whitebox.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -70,8 +70,6 @@
Symbol* signature_symbol);
static const char* lookup_jstring(const char* field_name, oop object);
static bool lookup_bool(const char* field_name, oop object);
- static void sweeper_thread_entry(JavaThread* thread, TRAPS);
- static JavaThread* create_sweeper_thread(TRAPS);
static int get_blob_type(const CodeBlob* code);
static CodeHeap* get_code_heap(int blob_type);
static CodeBlob* allocate_code_blob(int size, int blob_type);
--- a/hotspot/src/share/vm/runtime/arguments.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -92,6 +92,8 @@
bool Arguments::_UseOnStackReplacement = UseOnStackReplacement;
bool Arguments::_BackgroundCompilation = BackgroundCompilation;
bool Arguments::_ClipInlining = ClipInlining;
+intx Arguments::_Tier3InvokeNotifyFreqLog = Tier3InvokeNotifyFreqLog;
+intx Arguments::_Tier4InvocationThreshold = Tier4InvocationThreshold;
char* Arguments::SharedArchivePath = NULL;
@@ -1069,6 +1071,14 @@
AlwaysCompileLoopMethods = Arguments::_AlwaysCompileLoopMethods;
UseOnStackReplacement = Arguments::_UseOnStackReplacement;
BackgroundCompilation = Arguments::_BackgroundCompilation;
+ if (TieredCompilation) {
+ if (FLAG_IS_DEFAULT(Tier3InvokeNotifyFreqLog)) {
+ Tier3InvokeNotifyFreqLog = Arguments::_Tier3InvokeNotifyFreqLog;
+ }
+ if (FLAG_IS_DEFAULT(Tier4InvocationThreshold)) {
+ Tier4InvocationThreshold = Arguments::_Tier4InvocationThreshold;
+ }
+ }
// Change from defaults based on mode
switch (mode) {
@@ -2589,6 +2599,10 @@
Arguments::_UseOnStackReplacement = UseOnStackReplacement;
Arguments::_ClipInlining = ClipInlining;
Arguments::_BackgroundCompilation = BackgroundCompilation;
+ if (TieredCompilation) {
+ Arguments::_Tier3InvokeNotifyFreqLog = Tier3InvokeNotifyFreqLog;
+ Arguments::_Tier4InvocationThreshold = Tier4InvocationThreshold;
+ }
// Setup flags for mixed which is the default
set_mode_flags(_mixed);
--- a/hotspot/src/share/vm/runtime/arguments.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -323,6 +323,8 @@
static bool _BackgroundCompilation;
static bool _ClipInlining;
static bool _CIDynamicCompilePriority;
+ static intx _Tier3InvokeNotifyFreqLog;
+ static intx _Tier4InvocationThreshold;
// Tiered
static void set_tiered_flags();
--- a/hotspot/src/share/vm/runtime/deoptimization.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/runtime/deoptimization.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1861,6 +1861,7 @@
"speculate_null_check",
"rtm_state_change",
"unstable_if",
+ "unstable_fused_if",
"tenured"
};
const char* Deoptimization::_trap_action_name[] = {
--- a/hotspot/src/share/vm/runtime/deoptimization.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/runtime/deoptimization.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -63,6 +63,7 @@
Reason_speculate_null_check, // saw unexpected null from type speculation
Reason_rtm_state_change, // rtm state change detected
Reason_unstable_if, // a branch predicted always false was taken
+ Reason_unstable_fused_if, // fused two ifs that had each one untaken branch. One is now taken.
// Reason_tenured is counted separately, add normal counted Reasons above.
// Related to MethodData::_trap_hist_limit where Reason_tenured isn't included
@@ -326,6 +327,8 @@
return Reason_null_check;
else if (reason == Reason_unstable_if)
return Reason_intrinsic;
+ else if (reason == Reason_unstable_fused_if)
+ return Reason_range_check;
else
return Reason_none;
}
--- a/hotspot/src/share/vm/runtime/globals.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -3912,7 +3912,10 @@
"Enable event-based tracing") \
\
product(bool, UseLockedTracing, false, \
- "Use locked-tracing when doing event-based tracing")
+ "Use locked-tracing when doing event-based tracing") \
+ \
+ diagnostic(bool, UseUnalignedAccesses, false, \
+ "Use unaligned memory accesses in sun.misc.Unsafe")
/*
* Macros for factoring of globals
--- a/hotspot/src/share/vm/runtime/sweeper.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/runtime/sweeper.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -144,6 +144,7 @@
int NMethodSweeper::_seen = 0; // Nof. nmethod we have currently processed in current pass of CodeCache
volatile bool NMethodSweeper::_should_sweep = true; // Indicates if we should invoke the sweeper
+volatile bool NMethodSweeper::_force_sweep = false;// Indicates if we should force a sweep
volatile int NMethodSweeper::_bytes_changed = 0; // Counts the total nmethod size if the nmethod changed from:
// 1) alive -> not_entrant
// 2) not_entrant -> zombie
@@ -276,6 +277,23 @@
}
/**
+ * Wakes up the sweeper thread and forces a sweep. Blocks until it finished.
+ */
+void NMethodSweeper::force_sweep() {
+ ThreadBlockInVM tbivm(JavaThread::current());
+ MutexLockerEx waiter(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+ // Request forced sweep
+ _force_sweep = true;
+ while (_force_sweep) {
+ // Notify sweeper that we want to force a sweep and wait for completion.
+ // In case a sweep currently takes place we timeout and try again because
+ // we want to enforce a full sweep.
+ CodeCache_lock->notify();
+ CodeCache_lock->wait(Mutex::_no_safepoint_check_flag, 1000);
+ }
+}
+
+/**
* Handle a safepoint request
*/
void NMethodSweeper::handle_safepoint_request() {
@@ -335,6 +353,9 @@
}
}
+ // Remember if this was a forced sweep
+ bool forced = _force_sweep;
+
// Force stack scanning if there is only 10% free space in the code cache.
// We force stack scanning only non-profiled code heap gets full, since critical
// allocation go to the non-profiled heap and we must be make sure that there is
@@ -344,7 +365,7 @@
do_stack_scanning();
}
- if (_should_sweep) {
+ if (_should_sweep || forced) {
init_sweeper_log();
sweep_code_cache();
}
@@ -356,12 +377,20 @@
_should_sweep = false;
// If there was enough state change, 'possibly_enable_sweeper()'
// sets '_should_sweep' to true
- possibly_enable_sweeper();
+ possibly_enable_sweeper();
// Reset _bytes_changed only if there was enough state change. _bytes_changed
// can further increase by calls to 'report_state_change'.
if (_should_sweep) {
_bytes_changed = 0;
}
+
+ if (forced) {
+ // Notify requester that forced sweep finished
+ assert(_force_sweep, "Should be a forced sweep");
+ MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+ _force_sweep = false;
+ CodeCache_lock->notify();
+ }
}
void NMethodSweeper::sweep_code_cache() {
--- a/hotspot/src/share/vm/runtime/sweeper.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/runtime/sweeper.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -54,7 +54,6 @@
// nmethod's space is freed.
class NMethodSweeper : public AllStatic {
- friend class WhiteBox;
private:
enum MethodStateChange {
None,
@@ -71,6 +70,7 @@
static volatile int _sweep_started; // Flag to control conc sweeper
static volatile bool _should_sweep; // Indicates if we should invoke the sweeper
+ static volatile bool _force_sweep; // Indicates if we should force a sweep
static volatile int _bytes_changed; // Counts the total nmethod size if the nmethod changed from:
// 1) alive -> not_entrant
// 2) not_entrant -> zombie
@@ -117,6 +117,7 @@
static void mark_active_nmethods(); // Invoked at the end of each safepoint
static void sweeper_loop();
static void notify(int code_blob_type); // Possibly start the sweeper thread.
+ static void force_sweep();
static int hotness_counter_reset_val();
static void report_state_change(nmethod* nm);
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Fri Apr 03 11:41:01 2015 -0700
@@ -1985,13 +1985,18 @@
declare_c2_type(PowDNode, Node) \
declare_c2_type(ReverseBytesINode, Node) \
declare_c2_type(ReverseBytesLNode, Node) \
+ declare_c2_type(ReductionNode, Node) \
declare_c2_type(VectorNode, Node) \
declare_c2_type(AddVBNode, VectorNode) \
declare_c2_type(AddVSNode, VectorNode) \
declare_c2_type(AddVINode, VectorNode) \
+ declare_c2_type(AddReductionVINode, ReductionNode) \
declare_c2_type(AddVLNode, VectorNode) \
+ declare_c2_type(AddReductionVLNode, ReductionNode) \
declare_c2_type(AddVFNode, VectorNode) \
+ declare_c2_type(AddReductionVFNode, ReductionNode) \
declare_c2_type(AddVDNode, VectorNode) \
+ declare_c2_type(AddReductionVDNode, ReductionNode) \
declare_c2_type(SubVBNode, VectorNode) \
declare_c2_type(SubVSNode, VectorNode) \
declare_c2_type(SubVINode, VectorNode) \
@@ -2000,8 +2005,11 @@
declare_c2_type(SubVDNode, VectorNode) \
declare_c2_type(MulVSNode, VectorNode) \
declare_c2_type(MulVINode, VectorNode) \
+ declare_c2_type(MulReductionVINode, ReductionNode) \
declare_c2_type(MulVFNode, VectorNode) \
+ declare_c2_type(MulReductionVFNode, ReductionNode) \
declare_c2_type(MulVDNode, VectorNode) \
+ declare_c2_type(MulReductionVDNode, ReductionNode) \
declare_c2_type(DivVFNode, VectorNode) \
declare_c2_type(DivVDNode, VectorNode) \
declare_c2_type(LShiftVBNode, VectorNode) \
@@ -2516,6 +2524,7 @@
declare_constant(Deoptimization::Reason_speculate_null_check) \
declare_constant(Deoptimization::Reason_rtm_state_change) \
declare_constant(Deoptimization::Reason_unstable_if) \
+ declare_constant(Deoptimization::Reason_unstable_fused_if) \
declare_constant(Deoptimization::Reason_tenured) \
declare_constant(Deoptimization::Reason_LIMIT) \
declare_constant(Deoptimization::Reason_RECORDED_LIMIT) \
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp Fri Apr 03 11:41:01 2015 -0700
@@ -1345,6 +1345,13 @@
return (intptr_t) p;
}
+// swap a & b
+template<class T> static void swap(T& a, T& b) {
+ T tmp = a;
+ a = b;
+ b = tmp;
+}
+
// Printf-style formatters for fixed- and variable-width types as pointers and
// integers. These are derived from the definitions in inttypes.h. If the platform
// doesn't provide appropriate definitions, they should be provided in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/arraycopy/TestArrayCopyBadReexec.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8073866
+ * @summary Fix for 8064703 may also cause stores between the allocation and arraycopy to be rexecuted after a deoptimization
+ * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestArrayCopyBadReexec
+ *
+ */
+
+public class TestArrayCopyBadReexec {
+
+ static int val;
+
+ static int[] m1(int[] src, int l) {
+ if (src == null) {
+ return null;
+ }
+ int[] dest = new int[10];
+ val++;
+ try {
+ System.arraycopy(src, 0, dest, 0, l);
+ } catch (IndexOutOfBoundsException npe) {
+ }
+ return dest;
+ }
+
+ static public void main(String[] args) {
+ int[] src = new int[10];
+ int[] res = null;
+ boolean success = true;
+
+ for (int i = 0; i < 20000; i++) {
+ m1(src, 10);
+ }
+
+ int val_before = val;
+
+ m1(src, -1);
+
+ if (val - val_before != 1) {
+ System.out.println("Bad increment: " + (val - val_before));
+ throw new RuntimeException("Test failed");
+ }
+ }
+}
--- a/hotspot/test/compiler/arraycopy/TestArrayCopyNoInit.java Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/test/compiler/arraycopy/TestArrayCopyNoInit.java Fri Apr 03 11:41:01 2015 -0700
@@ -76,7 +76,7 @@
static TestArrayCopyNoInit[] m5(Object[] src) {
Object tmp = src[0];
TestArrayCopyNoInit[] dest = new TestArrayCopyNoInit[10];
- System.arraycopy(src, 0, dest, 0, 0);
+ System.arraycopy(src, 0, dest, 0, 10);
return dest;
}
@@ -110,7 +110,7 @@
static H[] m6(Object[] src) {
Object tmp = src[0];
H[] dest = new H[10];
- System.arraycopy(src, 0, dest, 0, 0);
+ System.arraycopy(src, 0, dest, 0, 10);
return dest;
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/arraycopy/TestArrayCopyStoppedAfterGuards.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8075921
+ * @summary control becomes top after arraycopy guards and confuses tighly coupled allocation logic
+ * @run main/othervm -Xcomp -XX:CompileOnly=TestArrayCopyStoppedAfterGuards.test,System.arraycopy TestArrayCopyStoppedAfterGuards
+ *
+ */
+
+public class TestArrayCopyStoppedAfterGuards {
+
+ static void test() {
+ Object src = new Object();
+ int[] dst = new int[10];
+ System.arraycopy(src, 0, dst, 0, 10);
+ }
+
+ static public void main(String[] args) {
+ // warmup
+ Object o = new Object();
+ int[] src = new int[10];
+ int[] dst = new int[10];
+ System.arraycopy(src, 0, dst, 0, 10);
+
+ try {
+ test();
+ } catch(ArrayStoreException ase) {}
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/inlining/DefaultMethodsDependencies.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8069263
+ * @summary Deoptimization between array allocation and arraycopy may result in non initialized array
+ * @run main/othervm -XX:-BackgroundCompilation -XX:CompileOnly=DefaultMethodsDependencies::test -XX:CompileOnly=DefaultMethodsDependencies$I2::m1 DefaultMethodsDependencies
+ *
+ */
+
+public class DefaultMethodsDependencies {
+
+ interface I1 {
+ void m1();
+ // triggers processing of default methods in C1
+ default void m2() {
+ }
+ }
+
+ interface I2 extends I1 {
+ // added to C2 as default method
+ default void m1() {
+ }
+ }
+
+ static abstract class C1 implements I1 {
+ }
+
+ static class C2 extends C1 implements I2 {
+ }
+
+ static void test(C1 obj) {
+ obj.m1();
+ }
+
+ static public void main(String[] args) {
+ C2 obj = new C2();
+ for (int i = 0; i < 20000; i++) {
+ test(obj);
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/unsafe/HeapByteBufferTest.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,372 @@
+//
+// Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, Red Hat Inc. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+
+import static java.lang.Math.abs;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import static java.nio.ByteOrder.BIG_ENDIAN;
+import static java.nio.ByteOrder.LITTLE_ENDIAN;
+import java.util.SplittableRandom;
+import java.util.Arrays;
+
+/**
+ * @test
+ * @bug 8026049
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-UseUnalignedAccesses HeapByteBufferTest
+ * @run main/othervm HeapByteBufferTest
+ * @summary Verify that byte buffers are correctly accessed.
+ */
+
+// A wrapper for a ByteBuffer which maintains a backing array and a
+// position. Whenever this wrapper is written the backing array and
+// the wrapped byte buffer are updated together, and whenever it is
+// read we check that the ByteBuffer and the backing array are identical.
+
+class MyByteBuffer {
+ final ByteBuffer buf;
+ final byte[] bytes;
+ int pos;
+ ByteOrder byteOrder = BIG_ENDIAN;
+
+ MyByteBuffer(ByteBuffer buf, byte[] bytes) {
+ this.buf = buf;
+ this.bytes = Arrays.copyOf(bytes, bytes.length);
+ pos = 0;
+ }
+
+ public final MyByteBuffer order(ByteOrder bo) {
+ byteOrder = bo;
+ buf.order(bo);
+ return this;
+ }
+
+ static MyByteBuffer wrap(byte[] bytes) {
+ return new MyByteBuffer(ByteBuffer.wrap(bytes), bytes);
+ }
+
+ int capacity() { return bytes.length; }
+ int position() {
+ if (buf.position() != pos)
+ throw new RuntimeException();
+ return buf.position();
+ }
+
+ byte[] array() { return buf.array(); }
+ byte[] backingArray() { return bytes; }
+
+ private static byte long7(long x) { return (byte)(x >> 56); }
+ private static byte long6(long x) { return (byte)(x >> 48); }
+ private static byte long5(long x) { return (byte)(x >> 40); }
+ private static byte long4(long x) { return (byte)(x >> 32); }
+ private static byte long3(long x) { return (byte)(x >> 24); }
+ private static byte long2(long x) { return (byte)(x >> 16); }
+ private static byte long1(long x) { return (byte)(x >> 8); }
+ private static byte long0(long x) { return (byte)(x ); }
+
+ private static byte int3(int x) { return (byte)(x >> 24); }
+ private static byte int2(int x) { return (byte)(x >> 16); }
+ private static byte int1(int x) { return (byte)(x >> 8); }
+ private static byte int0(int x) { return (byte)(x ); }
+
+ private static byte short1(short x) { return (byte)(x >> 8); }
+ private static byte short0(short x) { return (byte)(x ); }
+
+ byte _get(long i) { return bytes[(int)i]; }
+ void _put(long i, byte x) { bytes[(int)i] = x; }
+
+ private void putLongX(long a, long x) {
+ if (byteOrder == BIG_ENDIAN) {
+ x = Long.reverseBytes(x);
+ }
+ _put(a + 7, long7(x));
+ _put(a + 6, long6(x));
+ _put(a + 5, long5(x));
+ _put(a + 4, long4(x));
+ _put(a + 3, long3(x));
+ _put(a + 2, long2(x));
+ _put(a + 1, long1(x));
+ _put(a , long0(x));
+ }
+
+ private void putIntX(long a, int x) {
+ if (byteOrder == BIG_ENDIAN) {
+ x = Integer.reverseBytes(x);
+ }
+ _put(a + 3, int3(x));
+ _put(a + 2, int2(x));
+ _put(a + 1, int1(x));
+ _put(a , int0(x));
+ }
+
+ private void putShortX(int bi, short x) {
+ if (byteOrder == BIG_ENDIAN) {
+ x = Short.reverseBytes(x);
+ }
+ _put(bi , short0(x));
+ _put(bi + 1, short1(x));
+ }
+
+ static private int makeInt(byte b3, byte b2, byte b1, byte b0) {
+ return (((b3 ) << 24) |
+ ((b2 & 0xff) << 16) |
+ ((b1 & 0xff) << 8) |
+ ((b0 & 0xff) ));
+ }
+ int getIntX(long a) {
+ int x = makeInt(_get(a + 3),
+ _get(a + 2),
+ _get(a + 1),
+ _get(a));
+ if (byteOrder == BIG_ENDIAN) {
+ x = Integer.reverseBytes(x);
+ }
+ return x;
+ }
+
+ static private long makeLong(byte b7, byte b6, byte b5, byte b4,
+ byte b3, byte b2, byte b1, byte b0)
+ {
+ return ((((long)b7 ) << 56) |
+ (((long)b6 & 0xff) << 48) |
+ (((long)b5 & 0xff) << 40) |
+ (((long)b4 & 0xff) << 32) |
+ (((long)b3 & 0xff) << 24) |
+ (((long)b2 & 0xff) << 16) |
+ (((long)b1 & 0xff) << 8) |
+ (((long)b0 & 0xff) ));
+ }
+
+ long getLongX(long a) {
+ long x = makeLong(_get(a + 7),
+ _get(a + 6),
+ _get(a + 5),
+ _get(a + 4),
+ _get(a + 3),
+ _get(a + 2),
+ _get(a + 1),
+ _get(a));
+ if (byteOrder == BIG_ENDIAN) {
+ x = Long.reverseBytes(x);
+ }
+ return x;
+ }
+
+ static private short makeShort(byte b1, byte b0) {
+ return (short)((b1 << 8) | (b0 & 0xff));
+ }
+
+ short getShortX(long a) {
+ short x = makeShort(_get(a + 1),
+ _get(a ));
+ if (byteOrder == BIG_ENDIAN) {
+ x = Short.reverseBytes(x);
+ }
+ return x;
+ }
+
+ double getDoubleX(long a) {
+ long x = getLongX(a);
+ return Double.longBitsToDouble(x);
+ }
+
+ double getFloatX(long a) {
+ int x = getIntX(a);
+ return Float.intBitsToFloat(x);
+ }
+
+ void ck(long x, long y) {
+ if (x != y) {
+ throw new RuntimeException(" x = " + Long.toHexString(x) + ", y = " + Long.toHexString(y));
+ }
+ }
+
+ void ck(double x, double y) {
+ if (x == x && y == y && x != y) {
+ ck(x, y);
+ }
+ }
+
+ long getLong(int i) { ck(buf.getLong(i), getLongX(i)); return buf.getLong(i); }
+ int getInt(int i) { ck(buf.getInt(i), getIntX(i)); return buf.getInt(i); }
+ short getShort(int i) { ck(buf.getShort(i), getShortX(i)); return buf.getShort(i); }
+ char getChar(int i) { ck(buf.getChar(i), (char)getShortX(i)); return buf.getChar(i); }
+ double getDouble(int i) { ck(buf.getDouble(i), getDoubleX(i)); return buf.getDouble(i); }
+ float getFloat(int i) { ck(buf.getFloat(i), getFloatX(i)); return buf.getFloat(i); }
+
+ void putLong(int i, long x) { buf.putLong(i, x); putLongX(i, x); }
+ void putInt(int i, int x) { buf.putInt(i, x); putIntX(i, x); }
+ void putShort(int i, short x) { buf.putShort(i, x); putShortX(i, x); }
+ void putChar(int i, char x) { buf.putChar(i, x); putShortX(i, (short)x); }
+ void putDouble(int i, double x) { buf.putDouble(i, x); putLongX(i, Double.doubleToRawLongBits(x)); }
+ void putFloat(int i, float x) { buf.putFloat(i, x); putIntX(i, Float.floatToRawIntBits(x)); }
+
+ long getLong() { ck(buf.getLong(buf.position()), getLongX(pos)); long x = buf.getLong(); pos += 8; return x; }
+ int getInt() { ck(buf.getInt(buf.position()), getIntX(pos)); int x = buf.getInt(); pos += 4; return x; }
+ short getShort() { ck(buf.getShort(buf.position()), getShortX(pos)); short x = buf.getShort(); pos += 2; return x; }
+ char getChar() { ck(buf.getChar(buf.position()), (char)getShortX(pos)); char x = buf.getChar(); pos += 2; return x; }
+ double getDouble() { ck(buf.getDouble(buf.position()), getDoubleX(pos)); double x = buf.getDouble(); pos += 8; return x; }
+ float getFloat() { ck(buf.getFloat(buf.position()), getFloatX(pos)); float x = buf.getFloat(); pos += 4; return x; }
+
+ void putLong(long x) { putLongX(pos, x); pos += 8; buf.putLong(x); }
+ void putInt(int x) { putIntX(pos, x); pos += 4; buf.putInt(x); }
+ void putShort(short x) { putShortX(pos, x); pos += 2; buf.putShort(x); }
+ void putChar(char x) { putShortX(pos, (short)x); pos += 2; buf.putChar(x); }
+ void putDouble(double x) { putLongX(pos, Double.doubleToRawLongBits(x)); pos += 8; buf.putDouble(x); }
+ void putFloat(float x) { putIntX(pos, Float.floatToRawIntBits(x)); pos += 4; buf.putFloat(x); }
+
+ void rewind() { pos = 0; buf.rewind(); }
+}
+
+public class HeapByteBufferTest implements Runnable {
+
+ SplittableRandom random = new SplittableRandom();
+ MyByteBuffer data = MyByteBuffer.wrap(new byte[1024]);
+
+ int randomOffset(SplittableRandom r, MyByteBuffer buf, int size) {
+ return abs(r.nextInt()) % (buf.capacity() - size);
+ }
+
+ long iterations;
+
+ HeapByteBufferTest(long iterations) {
+ this.iterations = iterations;
+ }
+
+ // The core of the test. Walk over the buffer reading and writing
+ // random data, XORing it as we go. We can detect writes in the
+ // wrong place, writes which are too long or too short, and reads
+ // or writes of the wrong data,
+ void step(SplittableRandom r) {
+ data.order((r.nextInt() & 1) != 0 ? BIG_ENDIAN : LITTLE_ENDIAN);
+
+ data.rewind();
+ while (data.position() < data.capacity())
+ data.putLong(data.getLong() ^ random.nextLong());
+
+ data.rewind();
+ while (data.position() < data.capacity())
+ data.putInt(data.getInt() ^ random.nextInt());
+
+ data.rewind();
+ while (data.position() < data.capacity())
+ data.putShort((short)(data.getShort() ^ random.nextInt()));
+
+ data.rewind();
+ while (data.position() < data.capacity())
+ data.putChar((char)(data.getChar() ^ random.nextInt()));
+
+ data.rewind();
+ while (data.position() < data.capacity()) {
+ data.putDouble(combine(data.getDouble(), random.nextLong()));
+ }
+
+ data.rewind();
+ while (data.position() < data.capacity())
+ data.putFloat(combine(data.getFloat(), random.nextInt()));
+
+ for (int i = 0; i < 100; i++) {
+ int offset = randomOffset(r, data, 8);
+ data.putLong(offset, data.getLong(offset) ^ random.nextLong());
+ }
+ for (int i = 0; i < 100; i++) {
+ int offset = randomOffset(r, data, 4);
+ data.putInt(offset, data.getInt(offset) ^ random.nextInt());
+ }
+ for (int i = 0; i < 100; i++) {
+ int offset = randomOffset(r, data, 4);
+ data.putShort(offset, (short)(data.getShort(offset) ^ random.nextInt()));
+ }
+ for (int i = 0; i < 100; i++) {
+ int offset = randomOffset(r, data, 4);
+ data.putChar(offset, (char)(data.getChar(offset) ^ random.nextInt()));
+ }
+ for (int i = 0; i < 100; i++) {
+ int offset = randomOffset(r, data, 8);
+ data.putDouble(offset, combine(data.getDouble(offset), random.nextLong()));
+ }
+ for (int i = 0; i < 100; i++) {
+ int offset = randomOffset(r, data, 4);
+ data.putFloat(offset, combine(data.getFloat(offset), random.nextInt()));
+ }
+ }
+
+ // XOR the bit pattern of a double and a long, returning the
+ // result as a double.
+ //
+ // We convert signalling NaNs to quiet NaNs. We need to do this
+ // because some platforms (in particular legacy 80x87) do not
+ // provide transparent conversions between integer and
+ // floating-point types even when using raw conversions but
+ // quietly convert sNaN to qNaN. This causes spurious test
+ // failures when the template interpreter uses 80x87 and the JITs
+ // use XMM registers.
+ //
+ public double combine(double prev, long bits) {
+ bits ^= Double.doubleToRawLongBits(prev);
+ double result = Double.longBitsToDouble(bits);
+ if (Double.isNaN(result)) {
+ result = Double.longBitsToDouble(bits | 0x8000000000000l);
+ }
+ return result;
+ }
+
+ // XOR the bit pattern of a float and an int, returning the result
+ // as a float. Convert sNaNs to qNaNs.
+ public Float combine(float prev, int bits) {
+ bits ^= Float.floatToRawIntBits(prev);
+ Float result = Float.intBitsToFloat(bits);
+ if (Float.isNaN(result)) {
+ result = Float.intBitsToFloat(bits | 0x400000);
+ }
+ return result;
+ }
+
+ public void run() {
+ SplittableRandom r = new SplittableRandom();
+
+ for (int i = 0; i < data.capacity(); i += 8) {
+ data.putLong(i, random.nextLong());
+ }
+
+ for (int i = 0; i < iterations; i++) {
+ step(r);
+ }
+
+ if (!Arrays.equals(data.array(), data.backingArray())) {
+ throw new RuntimeException();
+ }
+ }
+
+ public static void main(String[] args) {
+ // The number of iterations is high to ensure that tiered
+ // compilation kicks in all the way up to C2.
+ long iterations = 100000;
+ if (args.length > 0)
+ iterations = Long.parseLong(args[0]);
+
+ new HeapByteBufferTest(iterations).run();
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/superword/ProdRed_Double.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8074981
+ * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 ProdRed_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 ProdRed_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 ProdRed_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 ProdRed_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 ProdRed_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 ProdRed_Double
+ */
+
+public class ProdRed_Double
+{
+ public static void main(String[] args) throws Exception {
+ double[] a = new double[256*1024];
+ double[] b = new double[256*1024];
+ prodReductionInit(a,b);
+ double valid = 2000;
+ double total = 0;
+ for(int j = 0; j < 2000; j++) {
+ total = j + 1;
+ total = prodReductionImplement(a,b, total);
+ }
+ if(total == valid) {
+ System.out.println("Success");
+ } else {
+ System.out.println("Invalid sum of elements variable in total: " + total);
+ System.out.println("Expected value = " + valid);
+ throw new Exception("Failed");
+ }
+ }
+
+ public static void prodReductionInit(double[] a, double[] b)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ a[i] = i + 2;
+ b[i] = i + 1;
+ }
+ }
+
+ public static double prodReductionImplement(double[] a, double[] b, double total)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ total *= a[i] - b[i];
+ }
+ return total;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/superword/ProdRed_Float.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8074981
+ * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 ProdRed_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 ProdRed_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 ProdRed_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 ProdRed_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 ProdRed_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 ProdRed_Float
+ */
+
+public class ProdRed_Float
+{
+ public static void main(String[] args) throws Exception {
+ float[] a = new float[256*1024];
+ float[] b = new float[256*1024];
+ prodReductionInit(a,b);
+ float valid = 2000;
+ float total = 0;
+ for(int j = 0; j < 2000; j++) {
+ total = j + 1;
+ total = prodReductionImplement(a,b, total);
+ }
+ if(total == valid) {
+ System.out.println("Success");
+ } else {
+ System.out.println("Invalid sum of elements variable in total: " + total);
+ System.out.println("Expected value = " + valid);
+ throw new Exception("Failed");
+ }
+ }
+
+ public static void prodReductionInit(float[] a, float[] b)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ a[i] = i + 2;
+ b[i] = i + 1;
+ }
+ }
+
+ public static float prodReductionImplement(float[] a, float[] b, float total)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ total *= a[i] - b[i];
+ }
+ return total;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/superword/ProdRed_Int.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8074981
+ * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Int
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Int
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 ProdRed_Int
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 ProdRed_Int
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 ProdRed_Int
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 ProdRed_Int
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 ProdRed_Int
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 ProdRed_Int
+ */
+
+public class ProdRed_Int
+{
+ public static void main(String[] args) throws Exception {
+ int[] a = new int[256*1024];
+ int[] b = new int[256*1024];
+ prodReductionInit(a,b);
+ int valid = 419430401;
+ int total = 1;
+ for(int j = 0; j < 2000; j++) {
+ total = prodReductionImplement(a,b,total);
+ }
+ if(total == valid) {
+ System.out.println("Success");
+ } else {
+ System.out.println("Invalid sum of elements variable in total: " + total);
+ System.out.println("Expected value = " + valid);
+ throw new Exception("Failed");
+ }
+ }
+
+ public static void prodReductionInit(int[] a, int[] b)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ a[i] = i + 2;
+ b[i] = i + 1;
+ }
+ }
+
+ public static int prodReductionImplement(int[] a, int[] b, int total)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ total *= a[i] + b[i];
+ }
+ return total;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/superword/SumRed_Double.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8074981
+ * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRed_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRed_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRed_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRed_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRed_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRed_Double
+ */
+
+public class SumRed_Double
+{
+ public static void main(String[] args) throws Exception {
+ double[] a = new double[256*1024];
+ double[] b = new double[256*1024];
+ double[] c = new double[256*1024];
+ double[] d = new double[256*1024];
+ sumReductionInit(a,b,c);
+ double total = 0;
+ double valid = 3.6028590866691944E19;
+ for(int j = 0; j < 2000; j++) {
+ total = sumReductionImplement(a,b,c,d,total);
+ }
+ if(total == valid) {
+ System.out.println("Success");
+ } else {
+ System.out.println("Invalid sum of elements variable in total: " + total);
+ System.out.println("Expected value = " + valid);
+ throw new Exception("Failed");
+ }
+ }
+
+ public static void sumReductionInit(
+ double[] a,
+ double[] b,
+ double[] c)
+ {
+ for(int j = 0; j < 1; j++)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ a[i] = i * 1 + j;
+ b[i] = i * 1 - j;
+ c[i] = i + j;
+ }
+ }
+ }
+
+ public static double sumReductionImplement(
+ double[] a,
+ double[] b,
+ double[] c,
+ double[] d,
+ double total)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ d[i]= (a[i] * b[i]) + (a[i] * c[i]) + (b[i] * c[i]);
+ total += d[i];
+ }
+ return total;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/superword/SumRed_Float.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8074981
+ * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRed_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRed_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRed_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRed_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRed_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRed_Float
+ */
+
+public class SumRed_Float
+{
+ public static void main(String[] args) throws Exception {
+ float[] a = new float[256*1024];
+ float[] b = new float[256*1024];
+ float[] c = new float[256*1024];
+ float[] d = new float[256*1024];
+ sumReductionInit(a,b,c);
+ float total = 0;
+ float valid = (float)4.611686E18;
+ for(int j = 0; j < 2000; j++) {
+ total = sumReductionImplement(a,b,c,d,total);
+ }
+ if(total == valid) {
+ System.out.println("Success");
+ } else {
+ System.out.println("Invalid sum of elements variable in total: " + total);
+ System.out.println("Expected value = " + valid);
+ throw new Exception("Failed");
+ }
+ }
+
+ public static void sumReductionInit(
+ float[] a,
+ float[] b,
+ float[] c)
+ {
+ for(int j = 0; j < 1; j++)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ a[i] = i * 1 + j;
+ b[i] = i * 1 - j;
+ c[i] = i + j;
+ }
+ }
+ }
+
+ public static float sumReductionImplement(
+ float[] a,
+ float[] b,
+ float[] c,
+ float[] d,
+ float total)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ d[i]= (a[i] * b[i]) + (a[i] * c[i]) + (b[i] * c[i]);
+ total += d[i];
+ }
+ return total;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/superword/SumRed_Int.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8074981
+ * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Int
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Int
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRed_Int
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRed_Int
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRed_Int
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRed_Int
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRed_Int
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRed_Int
+ */
+
+public class SumRed_Int
+{
+ public static void main(String[] args) throws Exception {
+ int[] a = new int[256*1024];
+ int[] b = new int[256*1024];
+ int[] c = new int[256*1024];
+ int[] d = new int[256*1024];
+ sumReductionInit(a,b,c);
+ int total = 0;
+ int valid = 262144000;
+ for(int j = 0; j < 2000; j++) {
+ total = sumReductionImplement(a,b,c,d,total);
+ }
+ if(total == valid) {
+ System.out.println("Success");
+ } else {
+ System.out.println("Invalid sum of elements variable in total: " + total);
+ System.out.println("Expected value = " + valid);
+ throw new Exception("Failed");
+ }
+ }
+
+ public static void sumReductionInit(
+ int[] a,
+ int[] b,
+ int[] c)
+ {
+ for(int j = 0; j < 1; j++)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ a[i] = i * 1 + j;
+ b[i] = i * 1 - j;
+ c[i] = i + j;
+ }
+ }
+ }
+
+ public static int sumReductionImplement(
+ int[] a,
+ int[] b,
+ int[] c,
+ int[] d,
+ int total)
+ {
+ for(int i = 0; i < a.length; i++)
+ {
+ d[i]= (a[i] * b[i]) + (a[i] * c[i]) + (b[i] * c[i]);
+ total += d[i];
+ }
+ return total;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8073480
+ * @summary explicit range checks should be recognized by C2
+ * @library /testlibrary /../../test/lib /compiler/whitebox
+ * @build TestExplicitRangeChecks
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main ClassFileInstaller com.oracle.java.testlibrary.Platform
+ * @run main/othervm -ea -Xmixed -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:CompileCommand=compileonly,TestExplicitRangeChecks.test* TestExplicitRangeChecks
+ *
+ */
+
+import java.lang.annotation.*;
+import java.lang.reflect.*;
+import java.util.*;
+import sun.hotspot.WhiteBox;
+import sun.hotspot.code.NMethod;
+import com.oracle.java.testlibrary.Platform;
+import sun.misc.Unsafe;
+
+public class TestExplicitRangeChecks {
+
+ static int[] array = new int[10];
+
+ @Retention(RetentionPolicy.RUNTIME)
+ @interface Args {
+ int[] compile();
+ int[] good();
+ int[] bad();
+ boolean deoptimize() default true;
+ }
+
+ // Should be compiled as a single unsigned comparison
+ // 0 <= index < array.length
+ @Args(compile = {5,}, good = {0, 9}, bad = {-1, 10})
+ static boolean test1_1(int index, int[] array) {
+ if (index < 0 || index >= array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ // same test but so we can compile with same optimization after trap in test1_1
+ static boolean test1_2(int index, int[] array) {
+ if (index < 0 || index >= array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ // Shouldn't matter whether first or second test is the one
+ // against a constants
+ // 0 <= index < array.length
+ @Args(compile = {5,}, good = {0, 9}, bad = {-1, 10})
+ static boolean test2_1(int index, int[] array) {
+ if (index >= array.length || index < 0) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test2_2(int index, int[] array) {
+ if (index >= array.length || index < 0) {
+ return false;
+ }
+ return true;
+ }
+
+ // 0 <= index <= array.length
+ @Args(compile = {5,}, good = {0, 10}, bad = {-1, 11})
+ static boolean test3_1(int index, int[] array) {
+ if (index < 0 || index > array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test3_2(int index, int[] array) {
+ if (index < 0 || index > array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ // 0 <= index <= array.length
+ @Args(compile = {5,}, good = {0, 10}, bad = {-1, 11})
+ static boolean test4_1(int index, int[] array) {
+ if (index > array.length || index < 0 ) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test4_2(int index, int[] array) {
+ if (index > array.length || index < 0) {
+ return false;
+ }
+ return true;
+ }
+
+ static int[] test5_helper(int i) {
+ return (i < 100) ? new int[10] : new int[5];
+ }
+
+ // 0 < index < array.length
+ @Args(compile = {5,}, good = {1, 9}, bad = {0, 10})
+ static boolean test5_1(int index, int[] array) {
+ array = test5_helper(index); // array.length must be not constant greater than 1
+ if (index <= 0 || index >= array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test5_2(int index, int[] array) {
+ array = test5_helper(index); // array.length must be not constant greater than 1
+ if (index <= 0 || index >= array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ // 0 < index < array.length
+ @Args(compile = {5,}, good = {1, 9}, bad = {0, 10})
+ static boolean test6_1(int index, int[] array) {
+ array = test5_helper(index); // array.length must be not constant greater than 1
+ if (index >= array.length || index <= 0 ) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test6_2(int index, int[] array) {
+ array = test5_helper(index); // array.length must be not constant greater than 1
+ if (index >= array.length || index <= 0) {
+ return false;
+ }
+ return true;
+ }
+
+ // 0 < index <= array.length
+ @Args(compile = {5,}, good = {1, 10}, bad = {0, 11})
+ static boolean test7_1(int index, int[] array) {
+ if (index <= 0 || index > array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test7_2(int index, int[] array) {
+ if (index <= 0 || index > array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ // 0 < index <= array.length
+ @Args(compile = {5,}, good = {1, 10}, bad = {0, 11})
+ static boolean test8_1(int index, int[] array) {
+ if (index > array.length || index <= 0 ) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test8_2(int index, int[] array) {
+ if (index > array.length || index <= 0) {
+ return false;
+ }
+ return true;
+ }
+
+ static int[] test9_helper1(int i) {
+ return (i < 100) ? new int[1] : new int[2];
+ }
+
+ static int[] test9_helper2(int i) {
+ return (i < 100) ? new int[10] : new int[11];
+ }
+
+ // array1.length <= index < array2.length
+ @Args(compile = {5,}, good = {1, 9}, bad = {0, 10})
+ static boolean test9_1(int index, int[] array) {
+ int[] array1 = test9_helper1(index);
+ int[] array2 = test9_helper2(index);
+ if (index < array1.length || index >= array2.length) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test9_2(int index, int[] array) {
+ int[] array1 = test9_helper1(index);
+ int[] array2 = test9_helper2(index);
+ if (index < array1.length || index >= array2.length) {
+ return false;
+ }
+ return true;
+ }
+
+ // Previously supported pattern
+ @Args(compile = {-5,5,15}, good = {0, 9}, bad = {-1, 10}, deoptimize=false)
+ static boolean test10_1(int index, int[] array) {
+ if (index < 0 || index >= 10) {
+ return false;
+ }
+ return true;
+ }
+
+ static int[] array11 = new int[10];
+ @Args(compile = {5,}, good = {0, 9}, bad = {-1,})
+ static boolean test11_1(int index, int[] array) {
+ if (index < 0) {
+ return false;
+ }
+ int unused = array11[index];
+ // If this one is folded with the first test then we allow
+ // array access above to proceed even for out of bound array
+ // index and the method throws an
+ // ArrayIndexOutOfBoundsException.
+ if (index >= array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ static int[] array12 = {10, 10, 10, 10, 10, 10, 10, 10, 10, 10};
+ @Args(compile = {5,}, good = {0, 9}, bad = {-1,})
+ static boolean test12_1(int index, int[] array) {
+ // Cannot be folded otherwise would cause incorrect array
+ // access if the array12 range check is executed before the
+ // folded test.
+ if (index < 0 || index >= array12[index]) {
+ return false;
+ }
+ return true;
+ }
+
+ // Same as test1_1 but pass null array when index < 0: shouldn't
+ // cause NPE.
+ @Args(compile = {5,}, good = {0, 9}, bad = {})
+ static boolean test13_1(int index, int[] array) {
+ if (index < 0 || index >= array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ // Same as test10 but with uncommon traps
+ @Args(compile = {5}, good = {0, 9}, bad = {-1, 10})
+ static boolean test14_1(int index, int[] array) {
+ if (index < 0 || index >= 10) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test14_2(int index, int[] array) {
+ if (index < 0 || index >= 10) {
+ return false;
+ }
+ return true;
+ }
+
+ // Same as test13_1 but pass null array: null trap should be reported on first if
+ @Args(compile = {5,}, good = {0, 9}, bad = {})
+ static boolean test15_1(int index, int[] array) {
+ if (index < 0 || index >= array.length) {
+ return false;
+ }
+ return true;
+ }
+
+ // Same as test1 but with no null check between the integer comparisons
+ @Args(compile = {5,}, good = {0, 9}, bad = {-1, 10})
+ static boolean test16_1(int index, int[] array) {
+ int l = array.length;
+ if (index < 0 || index >= l) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test16_2(int index, int[] array) {
+ int l = array.length;
+ if (index < 0 || index >= l) {
+ return false;
+ }
+ return true;
+ }
+
+ // Same as test1 but bound check on array access should optimize
+ // out.
+ @Args(compile = {5,}, good = {0, 9}, bad = {-1, 10})
+ static boolean test17_1(int index, int[] array) {
+ if (index < 0 || index >= array.length) {
+ return false;
+ }
+ array[index] = 0;
+ return true;
+ }
+
+ static boolean test17_2(int index, int[] array) {
+ if (index < 0 || index >= array.length) {
+ return false;
+ }
+ array[index] = 0;
+ return true;
+ }
+
+ // Same as test1 but range check smearing should optimize
+ // 3rd range check out.
+ @Args(compile = {5,}, good = {}, bad = {})
+ static boolean test18_1(int index, int[] array) {
+ if (index < 0 || index >= array.length) {
+ return false;
+ }
+ array[index+2] = 0;
+ array[index+1] = 0;
+ return true;
+ }
+
+ static boolean test19_helper1(int index) {
+ if (index < 12) {
+ return false;
+ }
+ return true;
+ }
+
+ static boolean test19_helper2(int index) {
+ if (index > 8) {
+ return false;
+ }
+ return true;
+ }
+
+ // Second test should be optimized out
+ static boolean test19(int index, int[] array) {
+ test19_helper1(index);
+ test19_helper2(index);
+ return true;
+ }
+
+ static boolean success = true;
+
+ private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+
+ final HashMap<String,Method> tests = new HashMap<>();
+ {
+ for (Method m : this.getClass().getDeclaredMethods()) {
+ if (m.getName().matches("test[0-9]+(_[0-9])?")) {
+ assert(Modifier.isStatic(m.getModifiers())) : m;
+ tests.put(m.getName(), m);
+ }
+ }
+ }
+
+ void doTest(String name) throws Exception {
+ Method m = tests.get(name + "_1");
+
+ Args anno = m.getAnnotation(Args.class);
+ int[] compile = anno.compile();
+ int[] good = anno.good();
+ int[] bad = anno.bad();
+ boolean deoptimize = anno.deoptimize();
+
+ // Get compiled
+ for (int i = 0; i < 20000;) {
+ for (int j = 0; j < compile.length; j++) {
+ m.invoke(null, compile[j], array);
+ i++;
+ }
+ }
+
+ if (!WHITE_BOX.isMethodCompiled(m)) {
+ System.out.println(name + "_1 not compiled");
+ success = false;
+ }
+
+ // check that good values don't trigger exception or
+ // deoptimization
+ for (int i = 0; i < good.length; i++) {
+ boolean res = (boolean)m.invoke(null, good[i], array);
+
+ if (!res) {
+ System.out.println(name + " bad result for good input " + good[i]);
+ success = false;
+ }
+ if (!WHITE_BOX.isMethodCompiled(m)) {
+ System.out.println(name + " deoptimized on valid access");
+ success = false;
+ }
+ }
+
+ // check that bad values trigger exception and deoptimization
+ for (int i = 0; i < bad.length; i++) {
+ if (i > 0 && deoptimize) {
+ m = tests.get(name + "_" + (i+1));
+ for (int k = 0; k < 20000;) {
+ for (int j = 0; j < compile.length; j++) {
+ m.invoke(null, compile[j], array);
+ k++;
+ }
+ }
+ if (!WHITE_BOX.isMethodCompiled(m)) {
+ System.out.println(name + ("_" + (i+1)) + " not compiled");
+ success = false;
+ }
+ }
+
+ boolean res = (boolean)m.invoke(null, bad[i], array);
+
+ if (res) {
+ System.out.println(name + " bad result for bad input " + bad[i]);
+ success = false;
+ }
+ if (Platform.isServer()) {
+ if (deoptimize && WHITE_BOX.isMethodCompiled(m)) {
+ System.out.println(name + " not deoptimized on invalid access");
+ success = false;
+ } else if (!deoptimize && !WHITE_BOX.isMethodCompiled(m)) {
+ System.out.println(name + " deoptimized on invalid access");
+ success = false;
+ }
+ }
+ }
+
+ }
+
+ private static final Unsafe UNSAFE;
+
+ static {
+ try {
+ Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
+ unsafeField.setAccessible(true);
+ UNSAFE = (Unsafe) unsafeField.get(null);
+ }
+ catch (Exception e) {
+ throw new AssertionError(e);
+ }
+ }
+
+ // On x64, int to long conversion should optimize away in address computation
+ static int test20(int[] a) {
+ int sum = 0;
+ for (int i = 0; i < a.length; i++) {
+ sum += test20_helper(a, i);
+ }
+ return sum;
+ }
+
+ static int test20_helper(int[] a, int i) {
+ if (i < 0 || i >= a.length)
+ throw new ArrayIndexOutOfBoundsException();
+
+ long address = (((long) i) << 2) + UNSAFE.ARRAY_INT_BASE_OFFSET;
+ return UNSAFE.getInt(a, address);
+ }
+
+ static int test21(int[] a) {
+ int sum = 0;
+ for (int i = 0; i < a.length; i++) {
+ sum += test20_helper(a, i);
+ }
+ return sum;
+ }
+
+ static int test21_helper(int[] a, int i) {
+ if (i < 0 || i >= a.length)
+ throw new ArrayIndexOutOfBoundsException();
+
+ long address = (((long) i) << 2) + UNSAFE.ARRAY_INT_BASE_OFFSET;
+ return UNSAFE.getIntVolatile(a, address);
+ }
+
+ static public void main(String[] args) throws Exception {
+
+ if (WHITE_BOX.getBooleanVMFlag("BackgroundCompilation")) {
+ throw new AssertionError("Background compilation enabled");
+ }
+
+ TestExplicitRangeChecks test = new TestExplicitRangeChecks();
+
+ test.doTest("test1");
+ test.doTest("test2");
+ test.doTest("test3");
+ test.doTest("test4");
+
+ // pollute branch profile
+ for (int i = 0; i < 10000; i++) {
+ test5_helper((i%2 == 0) ? 0 : 1000);
+ }
+
+ test.doTest("test5");
+ test.doTest("test6");
+ test.doTest("test7");
+ test.doTest("test8");
+
+ // pollute branch profile
+ for (int i = 0; i < 10000; i++) {
+ test9_helper1((i%2 == 0) ? 0 : 1000);
+ test9_helper2((i%2 == 0) ? 0 : 1000);
+ }
+
+ test.doTest("test9");
+ test.doTest("test10");
+ test.doTest("test11");
+ test.doTest("test12");
+
+ test.doTest("test13");
+ {
+ Method m = test.tests.get("test13_1");
+ for (int i = 0; i < 1; i++) {
+ test13_1(-1, null);
+ if (!WHITE_BOX.isMethodCompiled(m)) {
+ break;
+ }
+ }
+ }
+ test.doTest("test13");
+ {
+ Method m = test.tests.get("test13_1");
+ for (int i = 0; i < 10; i++) {
+ test13_1(-1, null);
+ if (!WHITE_BOX.isMethodCompiled(m)) {
+ break;
+ }
+ }
+ }
+
+ test.doTest("test14");
+
+ test.doTest("test15");
+ {
+ Method m = test.tests.get("test15_1");
+ for (int i = 0; i < 10; i++) {
+ try {
+ test15_1(5, null);
+ } catch(NullPointerException npe) {}
+ if (!WHITE_BOX.isMethodCompiled(m)) {
+ break;
+ }
+ }
+ }
+ test.doTest("test15");
+ test.doTest("test16");
+ test.doTest("test17");
+ test.doTest("test18");
+
+ for (int i = 0; i < 20000; i++) {
+ test19_helper1(20);
+ test19_helper2(5);
+ }
+
+ {
+ Method m = test.tests.get("test19");
+ WHITE_BOX.enqueueMethodForCompilation(m, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
+ }
+
+ for (int i = 0; i < 20000; i++) {
+ test20(array);
+ }
+
+ for (int i = 0; i < 20000; i++) {
+ test21(array);
+ }
+
+ if (!success) {
+ throw new RuntimeException("some tests failed");
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/types/TestMeetExactConstantArrays.java Fri Apr 03 11:41:01 2015 -0700
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8075587
+ * @summary meet of 2 constant arrays result in bottom
+ * @run main/othervm TestMeetExactConstantArrays
+ *
+ */
+
+public class TestMeetExactConstantArrays {
+ public abstract static class NumbersHolder {
+ public Number[] getNumbers() {
+ return null;
+ }
+ }
+
+ public static class IntegersHolder extends NumbersHolder {
+ private final static Integer integers[] = { new Integer(1) };
+
+ public Number[] getNumbers() {
+ return integers;
+ }
+ }
+
+ public static class LongsHolder extends NumbersHolder {
+ private final static Long longs[] = { new Long(1) };
+
+ public Number[] getNumbers() {
+ return longs;
+ }
+ }
+
+ public static final void loopNumbers(NumbersHolder numbersHolder) {
+ Number[] numbers = numbersHolder.getNumbers();
+ for (int i = 0; i < numbers.length; i++) {
+ numbers[i].longValue();
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ for (int i = 0; i < 10000; i++) {
+ IntegersHolder integersHolder = new IntegersHolder();
+ LongsHolder longsHolder = new LongsHolder();
+ loopNumbers(integersHolder);
+ loopNumbers(longsHolder);
+ }
+ }
+}
--- a/hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java Thu Apr 02 13:38:47 2015 -0700
+++ b/hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java Fri Apr 03 11:41:01 2015 -0700
@@ -32,12 +32,12 @@
* @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
* -XX:+WhiteBoxAPI -Xmixed
* -XX:CompileCommand=compileonly,DeoptimizeFramesTest$TestCaseImpl::method
- * -XX:+IgnoreUnexpectedVMOptions -XX:-DeoptimizeRandom -XX:-DeoptimizeALot
+ * -XX:+IgnoreUnrecognizedVMOptions -XX:-DeoptimizeRandom -XX:-DeoptimizeALot
* DeoptimizeFramesTest true
* @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
* -XX:+WhiteBoxAPI -Xmixed
* -XX:CompileCommand=compileonly,DeoptimizeFramesTest$TestCaseImpl::method
- * -XX:+IgnoreUnexpectedVMOptions -XX:-DeoptimizeRandom -XX:-DeoptimizeALot
+ * -XX:+IgnoreUnrecognizedVMOptions -XX:-DeoptimizeRandom -XX:-DeoptimizeALot
* DeoptimizeFramesTest false
* @summary testing of WB::deoptimizeFrames()
*/