8077838: Recent developments for ppc.
Summary: Power 8 recognition and instructions, math.*Exact intrinsics and rtm, C2 optimizations
Reviewed-by: kvn, simonis
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -85,8 +85,7 @@
}
// Low-level andi-one-instruction-macro.
-void Assembler::andi(Register a, Register s, const int ui16) {
- assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
+void Assembler::andi(Register a, Register s, const long ui16) {
if (is_power_of_2_long(((jlong) ui16)+1)) {
// pow2minus1
clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
@@ -97,6 +96,7 @@
// negpow2
clrrdi(a, s, log2_long((jlong)-ui16));
} else {
+ assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
andi_(a, s, ui16);
}
}
@@ -356,7 +356,6 @@
// 16 bit immediate offset.
int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
// Avoid accidentally trying to use R0 for indexed addressing.
- assert(d != R0, "R0 not allowed");
assert_different_registers(d, tmp);
short xa, xb, xc, xd; // Four 16-bit chunks of const.
@@ -370,6 +369,58 @@
return 0;
}
+ int retval = 0;
+ if (return_simm16_rest) {
+ retval = xd;
+ x = rem << 16;
+ xd = 0;
+ }
+
+ if (d == R0) { // Can't use addi.
+ if (is_simm(x, 32)) { // opt 2: simm32
+ lis(d, x >> 16);
+ if (xd) ori(d, d, (unsigned short)xd);
+ } else {
+ // 64-bit value: x = xa xb xc xd
+ xa = (x >> 48) & 0xffff;
+ xb = (x >> 32) & 0xffff;
+ xc = (x >> 16) & 0xffff;
+ bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
+ if (tmp == noreg || (xc == 0 && xd == 0)) {
+ if (xa_loaded) {
+ lis(d, xa);
+ if (xb) { ori(d, d, (unsigned short)xb); }
+ } else {
+ li(d, xb);
+ }
+ sldi(d, d, 32);
+ if (xc) { oris(d, d, (unsigned short)xc); }
+ if (xd) { ori( d, d, (unsigned short)xd); }
+ } else {
+ // Exploit instruction level parallelism if we have a tmp register.
+ bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
+ if (xa_loaded) {
+ lis(tmp, xa);
+ }
+ if (xc_loaded) {
+ lis(d, xc);
+ }
+ if (xa_loaded) {
+ if (xb) { ori(tmp, tmp, (unsigned short)xb); }
+ } else {
+ li(tmp, xb);
+ }
+ if (xc_loaded) {
+ if (xd) { ori(d, d, (unsigned short)xd); }
+ } else {
+ li(d, xd);
+ }
+ insrdi(d, tmp, 32, 0);
+ }
+ }
+ return retval;
+ }
+
xc = rem & 0xFFFF; // Next 16-bit chunk.
rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
@@ -377,28 +428,27 @@
lis(d, xc);
} else { // High 32 bits needed.
- if (tmp != noreg) { // opt 3: We have a temp reg.
+ if (tmp != noreg && (int)x != 0) { // opt 3: We have a temp reg.
// No carry propagation between xc and higher chunks here (use logical instructions).
xa = (x >> 48) & 0xffff;
xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
- bool load_xa = (xa != 0) || (xb < 0);
+ bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
bool return_xd = false;
- if (load_xa) { lis(tmp, xa); }
+ if (xa_loaded) { lis(tmp, xa); }
if (xc) { lis(d, xc); }
- if (load_xa) {
+ if (xa_loaded) {
if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
} else {
- li(tmp, xb); // non-negative
+ li(tmp, xb);
}
if (xc) {
- if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
- else if (xd) { addi(d, d, xd); }
+ if (xd) { addi(d, d, xd); }
} else {
li(d, xd);
}
insrdi(d, tmp, 32, 0);
- return return_xd ? xd : 0; // non-negative
+ return retval;
}
xb = rem & 0xFFFF; // Next 16-bit chunk.
@@ -417,11 +467,51 @@
if (xc) { addis(d, d, xc); }
}
- // opt 5: Return offset to be inserted into following instruction.
- if (return_simm16_rest) return xd;
+ if (xd) { addi(d, d, xd); }
+ return retval;
+}
+
+// We emit only one addition to s to optimize latency.
+int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
+ assert(s != R0 && s != tmp, "unsupported");
+ long rem = x;
- if (xd) { addi(d, d, xd); }
- return 0;
+ // Case 1: Can use mr or addi.
+ short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
+ rem = (rem >> 16) + ((unsigned short)xd >> 15);
+ if (rem == 0) {
+ if (xd == 0) {
+ if (d != s) { mr(d, s); }
+ return 0;
+ }
+ if (return_simm16_rest) {
+ return xd;
+ }
+ addi(d, s, xd);
+ return 0;
+ }
+
+ // Case 2: Can use addis.
+ if (xd == 0) {
+ short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
+ rem = (rem >> 16) + ((unsigned short)xd >> 15);
+ if (rem == 0) {
+ addis(d, s, xc);
+ return 0;
+ }
+ }
+
+ // Other cases: load & add.
+ Register tmp1 = tmp,
+ tmp2 = noreg;
+ if ((d != tmp) && (d != s)) {
+ // Can use d.
+ tmp1 = d;
+ tmp2 = tmp;
+ }
+ int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
+ add(d, tmp1, s);
+ return simm16_rest;
}
#ifndef PRODUCT
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -224,10 +224,13 @@
ADDIS_OPCODE = (15u << OPCODE_SHIFT),
ADDIC__OPCODE = (13u << OPCODE_SHIFT),
ADDE_OPCODE = (31u << OPCODE_SHIFT | 138u << 1),
+ ADDME_OPCODE = (31u << OPCODE_SHIFT | 234u << 1),
+ ADDZE_OPCODE = (31u << OPCODE_SHIFT | 202u << 1),
SUBF_OPCODE = (31u << OPCODE_SHIFT | 40u << 1),
SUBFC_OPCODE = (31u << OPCODE_SHIFT | 8u << 1),
SUBFE_OPCODE = (31u << OPCODE_SHIFT | 136u << 1),
SUBFIC_OPCODE = (8u << OPCODE_SHIFT),
+ SUBFME_OPCODE = (31u << OPCODE_SHIFT | 232u << 1),
SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1),
DIVW_OPCODE = (31u << OPCODE_SHIFT | 491u << 1),
MULLW_OPCODE = (31u << OPCODE_SHIFT | 235u << 1),
@@ -657,6 +660,9 @@
SYNC_OPCODE = (31u << OPCODE_SHIFT | 598u << 1),
EIEIO_OPCODE = (31u << OPCODE_SHIFT | 854u << 1),
+ // Wait instructions for polling.
+ WAIT_OPCODE = (31u << OPCODE_SHIFT | 62u << 1),
+
// Trap instructions
TDI_OPCODE = (2u << OPCODE_SHIFT),
TWI_OPCODE = (3u << OPCODE_SHIFT),
@@ -666,8 +672,10 @@
// Atomics.
LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1),
LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1),
+ LQARX_OPCODE = (31u << OPCODE_SHIFT | 276u << 1),
STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1),
- STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1)
+ STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1),
+ STQCX_OPCODE = (31u << OPCODE_SHIFT | 182u << 1)
};
@@ -1171,6 +1179,14 @@
inline void adde_( Register d, Register a, Register b);
inline void subfe( Register d, Register a, Register b);
inline void subfe_( Register d, Register a, Register b);
+ inline void addme( Register d, Register a);
+ inline void addme_( Register d, Register a);
+ inline void subfme( Register d, Register a);
+ inline void subfme_(Register d, Register a);
+ inline void addze( Register d, Register a);
+ inline void addze_( Register d, Register a);
+ inline void subfze( Register d, Register a);
+ inline void subfze_(Register d, Register a);
inline void neg( Register d, Register a);
inline void neg_( Register d, Register a);
inline void mulli( Register d, Register a, int si16);
@@ -1189,6 +1205,38 @@
inline void divw( Register d, Register a, Register b);
inline void divw_( Register d, Register a, Register b);
+ // Fixed-Point Arithmetic Instructions with Overflow detection
+ inline void addo( Register d, Register a, Register b);
+ inline void addo_( Register d, Register a, Register b);
+ inline void subfo( Register d, Register a, Register b);
+ inline void subfo_( Register d, Register a, Register b);
+ inline void addco( Register d, Register a, Register b);
+ inline void addco_( Register d, Register a, Register b);
+ inline void subfco( Register d, Register a, Register b);
+ inline void subfco_( Register d, Register a, Register b);
+ inline void addeo( Register d, Register a, Register b);
+ inline void addeo_( Register d, Register a, Register b);
+ inline void subfeo( Register d, Register a, Register b);
+ inline void subfeo_( Register d, Register a, Register b);
+ inline void addmeo( Register d, Register a);
+ inline void addmeo_( Register d, Register a);
+ inline void subfmeo( Register d, Register a);
+ inline void subfmeo_(Register d, Register a);
+ inline void addzeo( Register d, Register a);
+ inline void addzeo_( Register d, Register a);
+ inline void subfzeo( Register d, Register a);
+ inline void subfzeo_(Register d, Register a);
+ inline void nego( Register d, Register a);
+ inline void nego_( Register d, Register a);
+ inline void mulldo( Register d, Register a, Register b);
+ inline void mulldo_( Register d, Register a, Register b);
+ inline void mullwo( Register d, Register a, Register b);
+ inline void mullwo_( Register d, Register a, Register b);
+ inline void divdo( Register d, Register a, Register b);
+ inline void divdo_( Register d, Register a, Register b);
+ inline void divwo( Register d, Register a, Register b);
+ inline void divwo_( Register d, Register a, Register b);
+
// extended mnemonics
inline void li( Register d, int si16);
inline void lis( Register d, int si16);
@@ -1303,7 +1351,7 @@
inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
// PPC 1, section 3.3.11, Fixed-Point Logical Instructions
- void andi( Register a, Register s, int ui16); // optimized version
+ void andi( Register a, Register s, long ui16); // optimized version
inline void andi_( Register a, Register s, int ui16);
inline void andis_( Register a, Register s, int ui16);
inline void ori( Register a, Register s, int ui16);
@@ -1688,14 +1736,21 @@
inline void isync();
inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
+ // Wait instructions for polling. Attention: May result in SIGILL.
+ inline void wait();
+ inline void waitrsv(); // >=Power7
+
// atomics
inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
+ inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
inline bool lxarx_hint_exclusive_access();
inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
+ inline void lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
inline void stwcx_( Register s, Register a, Register b);
inline void stdcx_( Register s, Register a, Register b);
+ inline void stqcx_( Register s, Register a, Register b);
// Instructions for adjusting thread priority for simultaneous
// multithreading (SMT) on Power5.
@@ -2054,10 +2109,13 @@
// Atomics: use ra0mem to disallow R0 as base.
inline void lwarx_unchecked(Register d, Register b, int eh1);
inline void ldarx_unchecked(Register d, Register b, int eh1);
+ inline void lqarx_unchecked(Register d, Register b, int eh1);
inline void lwarx( Register d, Register b, bool hint_exclusive_access);
inline void ldarx( Register d, Register b, bool hint_exclusive_access);
+ inline void lqarx( Register d, Register b, bool hint_exclusive_access);
inline void stwcx_(Register s, Register b);
inline void stdcx_(Register s, Register b);
+ inline void stqcx_(Register s, Register b);
inline void lfs( FloatRegister d, int si16);
inline void lfsx( FloatRegister d, Register b);
inline void lfd( FloatRegister d, int si16);
@@ -2120,6 +2178,20 @@
return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest);
}
+ // If return_simm16_rest, the return value needs to get added afterwards.
+ int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false);
+ inline int add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
+ return add_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
+ }
+
+ // If return_simm16_rest, the return value needs to get added afterwards.
+ inline int sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) {
+ return add_const_optimized(d, s, -x, tmp, return_simm16_rest);
+ }
+ inline int sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
+ return sub_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
+ }
+
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
#ifdef CHECK_DELAY
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -100,6 +100,14 @@
inline void Assembler::adde_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
inline void Assembler::subfe( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::addme( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
+inline void Assembler::addme_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
+inline void Assembler::subfme( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
+inline void Assembler::subfme_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
+inline void Assembler::addze( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
+inline void Assembler::addze_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
+inline void Assembler::subfze( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
+inline void Assembler::subfze_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
inline void Assembler::neg( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
inline void Assembler::neg_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
inline void Assembler::mulli( Register d, Register a, int si16) { emit_int32(MULLI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
@@ -118,6 +126,38 @@
inline void Assembler::divw( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+// Fixed-Point Arithmetic Instructions with Overflow detection
+inline void Assembler::addo( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addo_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfo( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfo_( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addco( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addco_( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfco( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfco_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addeo( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addeo_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfeo( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfeo_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addmeo( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
+inline void Assembler::addmeo_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
+inline void Assembler::subfmeo( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
+inline void Assembler::subfmeo_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
+inline void Assembler::addzeo( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
+inline void Assembler::addzeo_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
+inline void Assembler::subfzeo( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
+inline void Assembler::subfzeo_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
+inline void Assembler::nego( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
+inline void Assembler::nego_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
+inline void Assembler::mulldo( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::mulldo_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::mullwo( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::mullwo_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::divdo( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::divdo_( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::divwo( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+
// extended mnemonics
inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
@@ -540,15 +580,22 @@
inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); }
inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
+// Wait instructions for polling.
+inline void Assembler::wait() { emit_int32( WAIT_OPCODE); }
+inline void Assembler::waitrsv() { emit_int32( WAIT_OPCODE | 1<<(31-10)); } // WC=0b01 >=Power7
+
// atomics
// Use ra0mem to disallow R0 as base.
inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
+inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); }
inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
+inline void Assembler::stqcx_(Register s, Register a, Register b) { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
// Instructions for adjusting thread priority
// for simultaneous multithreading (SMT) on POWER5.
@@ -873,10 +920,13 @@
// ra0 version
inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
+inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::stqcx_(Register s, Register b) { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); }
// ra0 version
inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); }
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,7 @@
define_pd_global(intx, FLOATPRESSURE, 28);
define_pd_global(intx, FreqInlineSize, 175);
define_pd_global(intx, MinJumpTableSize, 10);
-define_pd_global(intx, INTPRESSURE, 25);
+define_pd_global(intx, INTPRESSURE, 26);
define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 16000);
--- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -58,7 +58,7 @@
// GC Ergo Flags
define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread.
-define_pd_global(uintx, TypeProfileLevel, 0);
+define_pd_global(uintx, TypeProfileLevel, 111);
// Platform dependent flag handling: flags only defined on this platform.
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
@@ -71,14 +71,26 @@
\
product(uintx, PowerArchitecturePPC64, 0, \
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
- "Power7. Default is 0. CPUs newer than Power7 will be " \
- "recognized as Power7.") \
+ "Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
\
/* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
/* indirect call by a direct call. */ \
product(bool, ReoptimizeCallSequences, true, \
"Reoptimize code-sequences of calls at runtime.") \
\
+ /* Power 8: Configure Data Stream Control Register. */ \
+ product(uint64_t,DSCR_PPC64, (uintx)-1, \
+ "Power8 or later: Specify encoded value for Data Stream Control " \
+ "Register") \
+ product(uint64_t,DSCR_DPFD_PPC64, 8, \
+ "Power8 or later: DPFD (default prefetch depth) value of the " \
+ "Data Stream Control Register." \
+ " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \
+ product(uint64_t,DSCR_URG_PPC64, 8, \
+ "Power8 or later: URG (depth attainment urgency) value of the " \
+ "Data Stream Control Register." \
+ " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \
+ \
product(bool, UseLoadInstructionsForStackBangingPPC64, false, \
"Use load instructions for stack banging.") \
\
@@ -121,6 +133,41 @@
\
product(bool, ZapMemory, false, "Write 0x0101... to empty memory." \
" Use this to ease debugging.") \
-
+ \
+ /* Use Restricted Transactional Memory for lock eliding */ \
+ product(bool, UseRTMLocking, false, \
+ "Enable RTM lock eliding for inflated locks in compiled code") \
+ \
+ experimental(bool, UseRTMForStackLocks, false, \
+ "Enable RTM lock eliding for stack locks in compiled code") \
+ \
+ product(bool, UseRTMDeopt, false, \
+ "Perform deopt and recompilation based on RTM abort ratio") \
+ \
+ product(uintx, RTMRetryCount, 5, \
+ "Number of RTM retries on lock abort or busy") \
+ \
+ experimental(intx, RTMSpinLoopCount, 100, \
+ "Spin count for lock to become free before RTM retry") \
+ \
+ experimental(intx, RTMAbortThreshold, 1000, \
+ "Calculate abort ratio after this number of aborts") \
+ \
+ experimental(intx, RTMLockingThreshold, 10000, \
+ "Lock count at which to do RTM lock eliding without " \
+ "abort ratio calculation") \
+ \
+ experimental(intx, RTMAbortRatio, 50, \
+ "Lock abort ratio at which to stop use RTM lock eliding") \
+ \
+ experimental(intx, RTMTotalCountIncrRate, 64, \
+ "Increment total RTM attempted lock count once every n times") \
+ \
+ experimental(intx, RTMLockingCalculationDelay, 0, \
+ "Number of milliseconds to wait before start calculating aborts " \
+ "for RTM locking") \
+ \
+ experimental(bool, UseRTMXendForLockBusy, true, \
+ "Use RTM Xend instead of Xabort when lock busy") \
#endif // CPU_PPC_VM_GLOBALS_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -446,7 +446,7 @@
}
// Load object from cpool->resolved_references(index).
-void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
+void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Label *is_null) {
assert_different_registers(result, index);
get_constant_pool(result);
@@ -469,7 +469,7 @@
#endif
// Add in the index.
add(result, tmp, result);
- load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
+ load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null);
}
// Generate a subtype check: branch to ok_is_subtype if sub_klass is
@@ -876,7 +876,6 @@
// If condition is true we are done and hence we can store 0 in the displaced
// header indicating it is a recursive lock.
bne(CCR0, slow_case);
- release();
std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
BasicLock::displaced_header_offset_in_bytes(), monitor);
b(done);
@@ -1861,7 +1860,7 @@
const Register mdp = tmp1;
add(mdp, tmp1, R28_mdx);
- // Pffset of the current profile entry to update.
+ // Offset of the current profile entry to update.
const Register entry_offset = tmp2;
// entry_offset = array len in number of cells
ld(entry_offset, in_bytes(ArrayData::array_len_offset()), mdp);
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -85,7 +85,7 @@
Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype);
// Load object from cpool->resolved_references(index).
- void load_resolved_reference_at_index(Register result, Register index);
+ void load_resolved_reference_at_index(Register result, Register index, Label *is_null = NULL);
void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1);
void load_receiver(Register Rparam_count, Register Rrecv_dst);
--- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -47,4 +47,4 @@
}
#endif
-#endif // CPU_PPC_VM_INTERPRETER_PPC_PP
+#endif // CPU_PPC_VM_INTERPRETER_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1455,7 +1455,7 @@
// Several special cases exist to avoid that unnecessary information is generated.
//
void MacroAssembler::cmpxchgd(ConditionRegister flag,
- Register dest_current_value, Register compare_value, Register exchange_value,
+ Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
Register addr_base, int semantics, bool cmpxchgx_hint,
Register int_flag_success, Label* failed_ext, bool contention_hint) {
Label retry;
@@ -1465,7 +1465,7 @@
// Save one branch if result is returned via register and result register is different from the other ones.
bool use_result_reg = (int_flag_success!=noreg);
- bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
+ bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
int_flag_success!=exchange_value && int_flag_success!=addr_base);
assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
@@ -1481,7 +1481,7 @@
// Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
if (contention_hint) { // Don't try to reserve if cmp fails.
ld(dest_current_value, 0, addr_base);
- cmpd(flag, dest_current_value, compare_value);
+ cmpd(flag, compare_value, dest_current_value);
bne(flag, failed);
}
@@ -1489,7 +1489,7 @@
bind(retry);
ldarx(dest_current_value, addr_base, cmpxchgx_hint);
- cmpd(flag, dest_current_value, compare_value);
+ cmpd(flag, compare_value, dest_current_value);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(flag, failed);
} else {
@@ -1873,7 +1873,6 @@
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
- fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
/*where=*/obj_reg,
@@ -1909,7 +1908,6 @@
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
- fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
/*where=*/obj_reg,
@@ -1946,7 +1944,6 @@
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
- fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
/*where=*/obj_reg,
@@ -1987,9 +1984,371 @@
beq(cr_reg, done);
}
+// TM on PPC64.
+void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
+ Label retry;
+ bind(retry);
+ ldarx(result, addr, /*hint*/ false);
+ addi(result, result, simm16);
+ stdcx_(result, addr);
+ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+ bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
+ } else {
+ bne( CCR0, retry); // stXcx_ sets CCR0
+ }
+}
+
+void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
+ Label retry;
+ bind(retry);
+ lwarx(result, addr, /*hint*/ false);
+ ori(result, result, uimm16);
+ stwcx_(result, addr);
+ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+ bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
+ } else {
+ bne( CCR0, retry); // stXcx_ sets CCR0
+ }
+}
+
+#if INCLUDE_RTM_OPT
+
+// Update rtm_counters based on abort status
+// input: abort_status
+// rtm_counters (RTMLockingCounters*)
+void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
+ // Mapping to keep PreciseRTMLockingStatistics similar to x86.
+ // x86 ppc (! means inverted, ? means not the same)
+ // 0 31 Set if abort caused by XABORT instruction.
+ // 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
+ // 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
+ // 3 10 Set if an internal buffer overflowed.
+ // 4 ?12 Set if a debug breakpoint was hit.
+ // 5 ?32 Set if an abort occurred during execution of a nested transaction.
+ const int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
+ Assembler::tm_failure_persistent, // inverted: transient
+ Assembler::tm_trans_cf,
+ Assembler::tm_footprint_of,
+ Assembler::tm_non_trans_cf,
+ Assembler::tm_suspended};
+ const bool tm_failure_inv[] = {false, true, false, false, false, false};
+ assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
+
+ const Register addr_Reg = R0;
+ // Keep track of offset to where rtm_counters_Reg had pointed to.
+ int counters_offs = RTMLockingCounters::abort_count_offset();
+ addi(addr_Reg, rtm_counters_Reg, counters_offs);
+ const Register temp_Reg = rtm_counters_Reg;
+
+ //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
+ ldx(temp_Reg, addr_Reg);
+ addi(temp_Reg, temp_Reg, 1);
+ stdx(temp_Reg, addr_Reg);
+
+ if (PrintPreciseRTMLockingStatistics) {
+ int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
+
+ //mftexasr(abort_status); done by caller
+ for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
+ counters_offs += counters_offs_delta;
+ li(temp_Reg, counters_offs_delta); // can't use addi with R0
+ add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
+ counters_offs_delta = sizeof(uintx);
+
+ Label check_abort;
+ rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
+ if (tm_failure_inv[i]) {
+ bne(CCR0, check_abort);
+ } else {
+ beq(CCR0, check_abort);
+ }
+ //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
+ ldx(temp_Reg, addr_Reg);
+ addi(temp_Reg, temp_Reg, 1);
+ stdx(temp_Reg, addr_Reg);
+ bind(check_abort);
+ }
+ }
+ li(temp_Reg, -counters_offs); // can't use addi with R0
+ add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
+}
+
+// Branch if (random & (count-1) != 0), count is 2^n
+// tmp and CR0 are killed
+void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
+ mftb(tmp);
+ andi_(tmp, tmp, count-1);
+ bne(CCR0, brLabel);
+}
+
+// Perform abort ratio calculation, set no_rtm bit if high ratio.
+// input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED
+void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
+ RTMLockingCounters* rtm_counters,
+ Metadata* method_data) {
+ Label L_done, L_check_always_rtm1, L_check_always_rtm2;
+
+ if (RTMLockingCalculationDelay > 0) {
+ // Delay calculation.
+ ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
+ cmpdi(CCR0, rtm_counters_Reg, 0);
+ beq(CCR0, L_done);
+ load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
+ }
+ // Abort ratio calculation only if abort_count > RTMAbortThreshold.
+ // Aborted transactions = abort_count * 100
+ // All transactions = total_count * RTMTotalCountIncrRate
+ // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
+ ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
+ cmpdi(CCR0, R0, RTMAbortThreshold);
+ blt(CCR0, L_check_always_rtm2);
+ mulli(R0, R0, 100);
+
+ const Register tmpReg = rtm_counters_Reg;
+ ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
+ mulli(tmpReg, tmpReg, RTMTotalCountIncrRate);
+ mulli(tmpReg, tmpReg, RTMAbortRatio);
+ cmpd(CCR0, R0, tmpReg);
+ blt(CCR0, L_check_always_rtm1); // jump to reload
+ if (method_data != NULL) {
+ // Set rtm_state to "no rtm" in MDO.
+ // Not using a metadata relocation. Method and Class Loader are kept alive anyway.
+ // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
+ load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
+ atomic_ori_int(R0, tmpReg, NoRTM);
+ }
+ b(L_done);
+
+ bind(L_check_always_rtm1);
+ load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
+ bind(L_check_always_rtm2);
+ ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
+ cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
+ blt(CCR0, L_done);
+ if (method_data != NULL) {
+ // Set rtm_state to "always rtm" in MDO.
+ // Not using a metadata relocation. See above.
+ load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
+ atomic_ori_int(R0, tmpReg, UseRTM);
+ }
+ bind(L_done);
+}
+
+// Update counters and perform abort ratio calculation.
+// input: abort_status_Reg
+void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
+ RTMLockingCounters* rtm_counters,
+ Metadata* method_data,
+ bool profile_rtm) {
+
+ assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+ // Update rtm counters based on state at abort.
+ // Reads abort_status_Reg, updates flags.
+ assert_different_registers(abort_status_Reg, temp_Reg);
+ load_const_optimized(temp_Reg, (address)rtm_counters, R0);
+ rtm_counters_update(abort_status_Reg, temp_Reg);
+ if (profile_rtm) {
+ assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+ rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
+ }
+}
+
+// Retry on abort if abort's status indicates non-persistent failure.
+// inputs: retry_count_Reg
+// : abort_status_Reg
+// output: retry_count_Reg decremented by 1
+void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
+ Label& retryLabel, Label* checkRetry) {
+ Label doneRetry;
+ rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
+ bne(CCR0, doneRetry);
+ if (checkRetry) { bind(*checkRetry); }
+ addic_(retry_count_Reg, retry_count_Reg, -1);
+ blt(CCR0, doneRetry);
+ smt_yield(); // Can't use wait(). No permission (SIGILL).
+ b(retryLabel);
+ bind(doneRetry);
+}
+
+// Spin and retry if lock is busy.
+// inputs: box_Reg (monitor address)
+// : retry_count_Reg
+// output: retry_count_Reg decremented by 1
+// CTR is killed
+void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
+ Label SpinLoop, doneRetry;
+ addic_(retry_count_Reg, retry_count_Reg, -1);
+ blt(CCR0, doneRetry);
+ li(R0, RTMSpinLoopCount);
+ mtctr(R0);
+
+ bind(SpinLoop);
+ smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
+ bdz(retryLabel);
+ ld(R0, 0, owner_addr_Reg);
+ cmpdi(CCR0, R0, 0);
+ bne(CCR0, SpinLoop);
+ b(retryLabel);
+
+ bind(doneRetry);
+}
+
+// Use RTM for normal stack locks.
+// Input: objReg (object to lock)
+void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
+ Register obj, Register mark_word, Register tmp,
+ Register retry_on_abort_count_Reg,
+ RTMLockingCounters* stack_rtm_counters,
+ Metadata* method_data, bool profile_rtm,
+ Label& DONE_LABEL, Label& IsInflated) {
+ assert(UseRTMForStackLocks, "why call this otherwise?");
+ assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+ Label L_rtm_retry, L_decrement_retry, L_on_abort;
+
+ if (RTMRetryCount > 0) {
+ load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
+ bind(L_rtm_retry);
+ }
+ andi_(R0, mark_word, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
+ bne(CCR0, IsInflated);
+
+ if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+ Label L_noincrement;
+ if (RTMTotalCountIncrRate > 1) {
+ branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement);
+ }
+ assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
+ load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
+ //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
+ ldx(mark_word, tmp);
+ addi(mark_word, mark_word, 1);
+ stdx(mark_word, tmp);
+ bind(L_noincrement);
+ }
+ tbegin_();
+ beq(CCR0, L_on_abort);
+ ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked.
+ andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+ cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked
+ beq(flag, DONE_LABEL); // all done if unlocked
+
+ if (UseRTMXendForLockBusy) {
+ tend_();
+ b(L_decrement_retry);
+ } else {
+ tabort_();
+ }
+ bind(L_on_abort);
+ const Register abort_status_Reg = tmp;
+ mftexasr(abort_status_Reg);
+ if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+ rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
+ }
+ ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
+ if (RTMRetryCount > 0) {
+ // Retry on lock abort if abort status is not permanent.
+ rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
+ } else {
+ bind(L_decrement_retry);
+ }
+}
+
+// Use RTM for inflating locks
+// inputs: obj (object to lock)
+// mark_word (current header - KILLED)
+// boxReg (on-stack box address (displaced header location) - KILLED)
+void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
+ Register obj, Register mark_word, Register boxReg,
+ Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
+ RTMLockingCounters* rtm_counters,
+ Metadata* method_data, bool profile_rtm,
+ Label& DONE_LABEL) {
+ assert(UseRTMLocking, "why call this otherwise?");
+ Label L_rtm_retry, L_decrement_retry, L_on_abort;
+ // Clean monitor_value bit to get valid pointer.
+ int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+
+ // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark().
+ std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
+ const Register tmpReg = boxReg;
+ const Register owner_addr_Reg = mark_word;
+ addi(owner_addr_Reg, mark_word, owner_offset);
+
+ if (RTMRetryCount > 0) {
+ load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy.
+ load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
+ bind(L_rtm_retry);
+ }
+ if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+ Label L_noincrement;
+ if (RTMTotalCountIncrRate > 1) {
+ branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement);
+ }
+ assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+ load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
+ //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
+ ldx(tmpReg, R0);
+ addi(tmpReg, tmpReg, 1);
+ stdx(tmpReg, R0);
+ bind(L_noincrement);
+ }
+ tbegin_();
+ beq(CCR0, L_on_abort);
+ // We don't reload mark word. Will only be reset at safepoint.
+ ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
+ cmpdi(flag, R0, 0);
+ beq(flag, DONE_LABEL);
+
+ if (UseRTMXendForLockBusy) {
+ tend_();
+ b(L_decrement_retry);
+ } else {
+ tabort_();
+ }
+ bind(L_on_abort);
+ const Register abort_status_Reg = tmpReg;
+ mftexasr(abort_status_Reg);
+ if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+ rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
+ // Restore owner_addr_Reg
+ ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
+#ifdef ASSERT
+ andi_(R0, mark_word, markOopDesc::monitor_value);
+ asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint.
+#endif
+ addi(owner_addr_Reg, mark_word, owner_offset);
+ }
+ if (RTMRetryCount > 0) {
+ // Retry on lock abort if abort status is not permanent.
+ rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
+ }
+
+ // Appears unlocked - try to swing _owner from null to non-null.
+ cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
+ MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+ MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
+
+ if (RTMRetryCount > 0) {
+ // success done else retry
+ b(DONE_LABEL);
+ bind(L_decrement_retry);
+ // Spin and retry if lock is busy.
+ rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
+ } else {
+ bind(L_decrement_retry);
+ }
+}
+
+#endif // INCLUDE_RTM_OPT
+
// "The box" is the space on the stack where we copy the object mark.
void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
- Register temp, Register displaced_header, Register current_header) {
+ Register temp, Register displaced_header, Register current_header,
+ bool try_bias,
+ RTMLockingCounters* rtm_counters,
+ RTMLockingCounters* stack_rtm_counters,
+ Metadata* method_data,
+ bool use_rtm, bool profile_rtm) {
assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(flag != CCR0, "bad condition register");
Label cont;
@@ -2006,10 +2365,18 @@
return;
}
- if (UseBiasedLocking) {
+ if (try_bias) {
biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
}
+#if INCLUDE_RTM_OPT
+ if (UseRTMForStackLocks && use_rtm) {
+ rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
+ stack_rtm_counters, method_data, profile_rtm,
+ cont, object_has_monitor);
+ }
+#endif // INCLUDE_RTM_OPT
+
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
// The object has an existing monitor iff (mark & monitor_value) != 0.
@@ -2066,14 +2433,22 @@
bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner == NULL,
// otherwise m->owner may contain a thread or a stack address.
- //
+
+#if INCLUDE_RTM_OPT
+ // Use the same RTM locking code in 32- and 64-bit VM.
+ if (use_rtm) {
+ rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
+ rtm_counters, method_data, profile_rtm, cont);
+ } else {
+#endif // INCLUDE_RTM_OPT
+
// Try to CAS m->owner from NULL to current thread.
addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
li(displaced_header, 0);
// CmpxchgX sets flag to cmpX(current, displaced).
cmpxchgd(/*flag=*/flag,
/*current_value=*/current_header,
- /*compare_value=*/displaced_header,
+ /*compare_value=*/(intptr_t)0,
/*exchange_value=*/R16_thread,
/*where=*/temp,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
@@ -2095,6 +2470,10 @@
//asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
// "monitor->OwnerIsThread shouldn't be 0", -1);
# endif
+
+#if INCLUDE_RTM_OPT
+ } // use_rtm()
+#endif
}
bind(cont);
@@ -2103,7 +2482,8 @@
}
void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
- Register temp, Register displaced_header, Register current_header) {
+ Register temp, Register displaced_header, Register current_header,
+ bool try_bias, bool use_rtm) {
assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(flag != CCR0, "bad condition register");
Label cont;
@@ -2115,10 +2495,24 @@
return;
}
- if (UseBiasedLocking) {
+ if (try_bias) {
biased_locking_exit(flag, oop, current_header, cont);
}
+#if INCLUDE_RTM_OPT
+ if (UseRTMForStackLocks && use_rtm) {
+ assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+ Label L_regular_unlock;
+ ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword
+ andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+ cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked
+ bne(flag, L_regular_unlock); // else RegularLock
+ tend_(); // otherwise end...
+ b(cont); // ... and we're done
+ bind(L_regular_unlock);
+ }
+#endif
+
// Find the lock address and load the displaced header from the stack.
ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
@@ -2129,13 +2523,12 @@
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
// The object has an existing monitor iff (mark & monitor_value) != 0.
+ RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
- andi(temp, current_header, markOopDesc::monitor_value);
- cmpdi(flag, temp, 0);
- bne(flag, object_has_monitor);
+ andi_(R0, current_header, markOopDesc::monitor_value);
+ bne(CCR0, object_has_monitor);
}
-
// Check if it is still a light weight lock, this is is true if we see
// the stack address of the basicLock in the markOop of the object.
// Cmpxchg sets flag to cmpd(current_header, box).
@@ -2158,6 +2551,20 @@
bind(object_has_monitor);
addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
+
+ // It's inflated.
+#if INCLUDE_RTM_OPT
+ if (use_rtm) {
+ Label L_regular_inflated_unlock;
+ // Clean monitor_value bit to get valid pointer
+ cmpdi(flag, temp, 0);
+ bne(flag, L_regular_inflated_unlock);
+ tend_();
+ b(cont);
+ bind(L_regular_inflated_unlock);
+ }
+#endif
+
ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
@@ -2441,6 +2848,8 @@
// oop_result
// R16_thread->in_bytes(JavaThread::vm_result_offset())
+ verify_thread();
+
ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
li(R0, 0);
std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
@@ -2462,26 +2871,24 @@
std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
}
-
-void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {
Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
if (Universe::narrow_klass_base() != 0) {
// Use dst as temp if it is free.
- load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
- sub(dst, current, R0);
+ sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0);
current = dst;
}
if (Universe::narrow_klass_shift() != 0) {
srdi(dst, current, Universe::narrow_klass_shift());
current = dst;
}
- mr_if_needed(dst, current); // Move may be required.
+ return current;
}
void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
if (UseCompressedClassPointers) {
- encode_klass_not_null(ck, klass);
- stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
+ Register compressedKlass = encode_klass_not_null(ck, klass);
+ stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
} else {
std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
}
@@ -2514,8 +2921,7 @@
sldi(shifted_src, src, Universe::narrow_klass_shift());
}
if (Universe::narrow_klass_base() != 0) {
- load_const(R0, Universe::narrow_klass_base());
- add(dst, shifted_src, R0);
+ add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0);
}
}
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
#define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
#include "asm/assembler.hpp"
+#include "runtime/rtmLocking.hpp"
#include "utilities/macros.hpp"
// MacroAssembler extends Assembler by a few frequently used macros.
@@ -432,8 +433,8 @@
int semantics, bool cmpxchgx_hint = false,
Register int_flag_success = noreg, bool contention_hint = false);
void cmpxchgd(ConditionRegister flag,
- Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
- int semantics, bool cmpxchgx_hint = false,
+ Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
+ Register addr_base, int semantics, bool cmpxchgx_hint = false,
Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false);
// interface method calling
@@ -506,8 +507,42 @@
// biased locking exit case failed.
void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
- void compiler_fast_lock_object( ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
- void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
+ void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
+ void atomic_ori_int(Register addr, Register result, int uimm16);
+
+#if INCLUDE_RTM_OPT
+ void rtm_counters_update(Register abort_status, Register rtm_counters);
+ void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
+ void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
+ Metadata* method_data);
+ void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
+ RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
+ void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
+ Label& retryLabel, Label* checkRetry = NULL);
+ void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
+ void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
+ Register retry_on_abort_count,
+ RTMLockingCounters* stack_rtm_counters,
+ Metadata* method_data, bool profile_rtm,
+ Label& DONE_LABEL, Label& IsInflated);
+ void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
+ Register retry_on_busy_count, Register retry_on_abort_count,
+ RTMLockingCounters* rtm_counters,
+ Metadata* method_data, bool profile_rtm,
+ Label& DONE_LABEL);
+#endif
+
+ void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
+ Register tmp1, Register tmp2, Register tmp3,
+ bool try_bias = UseBiasedLocking,
+ RTMLockingCounters* rtm_counters = NULL,
+ RTMLockingCounters* stack_rtm_counters = NULL,
+ Metadata* method_data = NULL,
+ bool use_rtm = false, bool profile_rtm = false);
+
+ void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
+ Register tmp1, Register tmp2, Register tmp3,
+ bool try_bias = UseBiasedLocking, bool use_rtm = false);
// Support for serializing memory accesses between threads
void serialize_memory(Register thread, Register tmp1, Register tmp2);
@@ -576,7 +611,7 @@
Register tmp = noreg);
// Null allowed.
- inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg);
+ inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL);
// Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
// src == d allowed.
@@ -593,7 +628,7 @@
void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
static int instr_size_for_decode_klass_not_null();
void decode_klass_not_null(Register dst, Register src = noreg);
- void encode_klass_not_null(Register dst, Register src = noreg);
+ Register encode_klass_not_null(Register dst, Register src = noreg);
// Load common heap base into register.
void reinit_heapbase(Register d, Register tmp = noreg);
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -333,19 +333,29 @@
}
}
-inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) {
+inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1, Label *is_null) {
if (UseCompressedOops) {
lwz(d, offs, s1);
- decode_heap_oop(d);
+ if (is_null != NULL) {
+ cmpwi(CCR0, d, 0);
+ beq(CCR0, *is_null);
+ decode_heap_oop_not_null(d);
+ } else {
+ decode_heap_oop(d);
+ }
} else {
ld(d, offs, s1);
+ if (is_null != NULL) {
+ cmpdi(CCR0, d, 0);
+ beq(CCR0, *is_null);
+ }
}
}
inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) {
Register current = (src != noreg) ? src : d; // Oop to be compressed is in d if no src provided.
if (Universe::narrow_oop_base_overlaps()) {
- sub(d, current, R30);
+ sub_const_optimized(d, current, Universe::narrow_oop_base(), R0);
current = d;
}
if (Universe::narrow_oop_shift() != 0) {
@@ -358,7 +368,7 @@
inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) {
if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d &&
Universe::narrow_oop_shift() != 0) {
- mr(d, R30);
+ load_const_optimized(d, Universe::narrow_oop_base(), R0);
rldimi(d, src, Universe::narrow_oop_shift(), 32-Universe::narrow_oop_shift());
return d;
}
@@ -369,7 +379,7 @@
current = d;
}
if (Universe::narrow_oop_base() != NULL) {
- add(d, current, R30);
+ add_const_optimized(d, current, Universe::narrow_oop_base(), R0);
current = d;
}
return current; // Decoded oop is in this register.
@@ -377,11 +387,19 @@
inline void MacroAssembler::decode_heap_oop(Register d) {
Label isNull;
+ bool use_isel = false;
if (Universe::narrow_oop_base() != NULL) {
cmpwi(CCR0, d, 0);
- beq(CCR0, isNull);
+ if (VM_Version::has_isel()) {
+ use_isel = true;
+ } else {
+ beq(CCR0, isNull);
+ }
}
decode_heap_oop_not_null(d);
+ if (use_isel) {
+ isel_0(d, CCR0, Assembler::equal);
+ }
bind(isNull);
}
--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,9 +27,6 @@
// These definitions are inlined into class MethodHandles.
// Adapters
-//static unsigned int adapter_code_size() {
-// return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0);
-//}
enum /* platform_dependent_constants */ {
adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
};
@@ -45,7 +42,9 @@
static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
Register temp_reg, Register temp2_reg) {
- Unimplemented();
+ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+ temp_reg, temp2_reg,
+ "reference is a MH");
}
static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
--- a/hotspot/src/cpu/ppc/vm/ppc.ad Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad Wed Apr 15 12:44:56 2015 +0200
@@ -447,8 +447,8 @@
R26,
R27,
R28,
-/*R29*/ // global TOC
-/*R30*/ // Narrow Oop Base
+/*R29,*/ // global TOC
+ R30,
R31
);
@@ -484,58 +484,11 @@
R26,
R27,
R28,
-/*R29*/
-/*R30*/ // Narrow Oop Base
+/*R29,*/
+ R30,
R31
);
-// Complement-required-in-pipeline operands for narrow oops.
-reg_class bits32_reg_ro_not_complement (
-/*R0*/ // R0
- R1, // SP
- R2, // TOC
- R3,
- R4,
- R5,
- R6,
- R7,
- R8,
- R9,
- R10,
- R11,
- R12,
-/*R13,*/ // system thread id
- R14,
- R15,
- R16, // R16_thread
- R17,
- R18,
- R19,
- R20,
- R21,
- R22,
-/*R23,
- R24,
- R25,
- R26,
- R27,
- R28,*/
-/*R29,*/ // TODO: let allocator handle TOC!!
-/*R30,*/
- R31
-);
-
-// Complement-required-in-pipeline operands for narrow oops.
-// See 64-bit declaration.
-reg_class bits32_reg_ro_complement (
- R23,
- R24,
- R25,
- R26,
- R27,
- R28
-);
-
reg_class rscratch1_bits32_reg(R11);
reg_class rscratch2_bits32_reg(R12);
reg_class rarg1_bits32_reg(R3);
@@ -591,8 +544,8 @@
R26_H, R26,
R27_H, R27,
R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+ R30_H, R30,
R31_H, R31
);
@@ -629,8 +582,8 @@
R26_H, R26,
R27_H, R27,
R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+ R30_H, R30,
R31_H, R31
);
@@ -667,8 +620,8 @@
R26_H, R26,
R27_H, R27,
R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+ R30_H, R30,
R31_H, R31
);
@@ -704,64 +657,11 @@
R26_H, R26,
R27_H, R27,
R28_H, R28,
-/*R29_H, R29*/ // TODO: let allocator handle TOC!!
-/*R30_H, R30,*/
+/*R29_H, R29,*/ // TODO: let allocator handle TOC!!
+ R30_H, R30,
R31_H, R31
);
-// Complement-required-in-pipeline operands.
-reg_class bits64_reg_ro_not_complement (
-/*R0_H, R0*/ // R0
- R1_H, R1, // SP
- R2_H, R2, // TOC
- R3_H, R3,
- R4_H, R4,
- R5_H, R5,
- R6_H, R6,
- R7_H, R7,
- R8_H, R8,
- R9_H, R9,
- R10_H, R10,
- R11_H, R11,
- R12_H, R12,
-/*R13_H, R13*/ // system thread id
- R14_H, R14,
- R15_H, R15,
- R16_H, R16, // R16_thread
- R17_H, R17,
- R18_H, R18,
- R19_H, R19,
- R20_H, R20,
- R21_H, R21,
- R22_H, R22,
-/*R23_H, R23,
- R24_H, R24,
- R25_H, R25,
- R26_H, R26,
- R27_H, R27,
- R28_H, R28,*/
-/*R29_H, R29*/ // TODO: let allocator handle TOC!!
-/*R30_H, R30,*/
- R31_H, R31
-);
-
-// Complement-required-in-pipeline operands.
-// This register mask is used for the trap instructions that implement
-// the null checks on AIX. The trap instruction first computes the
-// complement of the value it shall trap on. Because of this, the
-// instruction can not be scheduled in the same cycle as an other
-// instruction reading the normal value of the same register. So we
-// force the value to check into 'bits64_reg_ro_not_complement'
-// and then copy it to 'bits64_reg_ro_complement' for the trap.
-reg_class bits64_reg_ro_complement (
- R23_H, R23,
- R24_H, R24,
- R25_H, R25,
- R26_H, R26,
- R27_H, R27,
- R28_H, R28
-);
-
// ----------------------------
// Special Class for Condition Code Flags Register
@@ -777,6 +677,17 @@
CCR7
);
+reg_class int_flags_ro(
+ CCR0,
+ CCR1,
+ CCR2,
+ CCR3,
+ CCR4,
+ CCR5,
+ CCR6,
+ CCR7
+);
+
reg_class int_flags_CR0(CCR0);
reg_class int_flags_CR1(CCR1);
reg_class int_flags_CR6(CCR6);
@@ -2876,7 +2787,7 @@
// Use release_store for card-marking to ensure that previous
// oop-stores are visible before the card-mark change.
- enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{
+ enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
// FIXME: Implement this as a cmove and use a fixed condition code
// register which is written on every transition to compiled code,
@@ -2897,8 +2808,8 @@
// Check CMSCollectorCardTableModRefBSExt::_requires_release and do the
// StoreStore barrier conditionally.
__ lwz(R0, 0, $releaseFieldAddr$$Register);
- __ cmpwi(CCR0, R0, 0);
- __ beq_predict_taken(CCR0, skip_storestore);
+ __ cmpwi($crx$$CondRegister, R0, 0);
+ __ beq_predict_taken($crx$$CondRegister, skip_storestore);
#endif
__ li(R0, 0);
__ membar(Assembler::StoreStore);
@@ -3108,7 +3019,7 @@
nodes->push(n2);
%}
- enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{
+ enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
MacroAssembler _masm(&cbuf);
@@ -3123,7 +3034,7 @@
__ bind(done);
%}
- enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{
+ enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
MacroAssembler _masm(&cbuf);
@@ -3269,7 +3180,7 @@
__ bind(done);
%}
- enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{
+ enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
MacroAssembler _masm(&cbuf);
@@ -3281,7 +3192,7 @@
__ bind(done);
%}
- enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{
+ enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
MacroAssembler _masm(&cbuf);
@@ -3309,7 +3220,7 @@
l);
%}
- enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{
+ enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
// The scheduler doesn't know about branch shortening, so we set the opcode
// to ppc64Opcode_bc in order to hide this detail from the scheduler.
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -3341,7 +3252,7 @@
%}
// Branch used with Power6 scheduling (can be shortened without changing the node).
- enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{
+ enc_class enc_bc_short_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
// The scheduler doesn't know about branch shortening, so we set the opcode
// to ppc64Opcode_bc in order to hide this detail from the scheduler.
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -4700,6 +4611,15 @@
interface(REG_INTER);
%}
+operand flagsRegSrc() %{
+ constraint(ALLOC_IN_RC(int_flags_ro));
+ match(RegFlags);
+ match(flagsReg);
+ match(flagsRegCR0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Condition Code Flag Register CR0
operand flagsRegCR0() %{
constraint(ALLOC_IN_RC(int_flags_CR0));
@@ -4783,6 +4703,13 @@
predicate(false /* TODO: PPC port MatchDecodeNodes*/);
constraint(ALLOC_IN_RC(bits32_reg_ro));
match(DecodeN reg);
+ format %{ "$reg" %}
+ interface(REG_INTER)
+%}
+
+operand iRegN2P_klass(iRegNsrc reg) %{
+ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+ constraint(ALLOC_IN_RC(bits32_reg_ro));
match(DecodeNKlass reg);
format %{ "$reg" %}
interface(REG_INTER)
@@ -4839,6 +4766,19 @@
predicate(false /* TODO: PPC port MatchDecodeNodes*/);
constraint(ALLOC_IN_RC(bits64_reg_ro));
match(DecodeN reg);
+ op_cost(100);
+ format %{ "[$reg]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0x0);
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
+operand indirectNarrow_klass(iRegNsrc reg) %{
+ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+ constraint(ALLOC_IN_RC(bits64_reg_ro));
match(DecodeNKlass reg);
op_cost(100);
format %{ "[$reg]" %}
@@ -4855,6 +4795,19 @@
predicate(false /* TODO: PPC port MatchDecodeNodes*/);
constraint(ALLOC_IN_RC(bits64_reg_ro));
match(AddP (DecodeN reg) offset);
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0x0);
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
+ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+ constraint(ALLOC_IN_RC(bits64_reg_ro));
match(AddP (DecodeNKlass reg) offset);
op_cost(100);
format %{ "[$reg + $offset]" %}
@@ -4871,6 +4824,19 @@
predicate(false /* TODO: PPC port MatchDecodeNodes*/);
constraint(ALLOC_IN_RC(bits64_reg_ro));
match(AddP (DecodeN reg) offset);
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0x0);
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
+ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+ constraint(ALLOC_IN_RC(bits64_reg_ro));
match(AddP (DecodeNKlass reg) offset);
op_cost(100);
format %{ "[$reg + $offset]" %}
@@ -4998,9 +4964,9 @@
// encoding and format. The classic case of this is memory operands.
// Indirect is not included since its use is limited to Compare & Swap.
-opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow);
+opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
// Memory operand where offsets are 4-aligned. Required for ld, std.
-opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4);
+opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
opclass indirectMemory(indirect, indirectNarrow);
// Special opclass for I and ConvL2I.
@@ -5009,7 +4975,7 @@
// Operand classes to match encode and decode. iRegN_P2N is only used
// for storeN. I have never seen an encode node elsewhere.
opclass iRegN_P2N(iRegNsrc, iRegP2N);
-opclass iRegP_N2P(iRegPsrc, iRegN2P);
+opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
//----------PIPELINE-----------------------------------------------------------
@@ -5593,6 +5559,19 @@
ins_pipe(pipe_class_memory);
%}
+instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
+ match(Set dst (DecodeNKlass (LoadNKlass mem)));
+ // SAPJVM GL 2014-05-21 Differs.
+ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 &&
+ _kids[0]->_leaf->as_Load()->is_unordered());
+ ins_cost(MEMORY_REF_COST);
+
+ format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %}
+ size(4);
+ ins_encode( enc_lwz(dst, mem) );
+ ins_pipe(pipe_class_memory);
+%}
+
// Load Pointer
instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
match(Set dst (LoadP mem));
@@ -5669,8 +5648,9 @@
%}
// Load Float acquire.
-instruct loadF_ac(regF dst, memory mem) %{
+instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
match(Set dst (LoadF mem));
+ effect(TEMP cr0);
ins_cost(3*MEMORY_REF_COST);
format %{ "LFS $dst, $mem \t// acquire\n\t"
@@ -5705,8 +5685,9 @@
%}
// Load Double - aligned acquire.
-instruct loadD_ac(regD dst, memory mem) %{
+instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
match(Set dst (LoadD mem));
+ effect(TEMP cr0);
ins_cost(3*MEMORY_REF_COST);
format %{ "LFD $dst, $mem \t// acquire\n\t"
@@ -6034,11 +6015,10 @@
instruct loadBase(iRegLdst dst) %{
effect(DEF dst);
- format %{ "MR $dst, r30_heapbase" %}
- size(4);
- ins_encode %{
- // TODO: PPC port $archOpcode(ppc64Opcode_or);
- __ mr($dst$$Register, R30);
+ format %{ "LoadConst $dst, heapbase" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ load_const_optimized($dst$$Register, Universe::narrow_oop_base(), R0);
%}
ins_pipe(pipe_class_default);
%}
@@ -6114,7 +6094,7 @@
effect(TEMP src2);
ins_cost(DEFAULT_COST);
- format %{ "ORI $dst, $src1, $src2 \t// narrow klass lo" %}
+ format %{ "ORI $dst, $src1, $src2 \t// narrow klass lo" %}
size(4);
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_ori);
@@ -6563,8 +6543,9 @@
// do a releasing store. For this, it gets the address of
// CMSCollectorCardTableModRefBSExt::_requires_release as input.
// (Using releaseFieldAddr in the match rule is a hack.)
-instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{
+instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{
match(Set mem (StoreCM mem releaseFieldAddr));
+ effect(TEMP crx);
predicate(false);
ins_cost(MEMORY_REF_COST);
@@ -6572,7 +6553,7 @@
ins_cannot_rematerialize(true);
format %{ "STB #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %}
- ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) );
+ ins_encode( enc_cms_card_mark(mem, releaseFieldAddr, crx) );
ins_pipe(pipe_class_memory);
%}
@@ -6589,8 +6570,9 @@
expand %{
immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %}
iRegLdst releaseFieldAddress;
+ flagsReg crx;
loadConL_Ex(releaseFieldAddress, baseImm);
- storeCM_CMS(mem, releaseFieldAddress);
+ storeCM_CMS(mem, releaseFieldAddress, crx);
%}
%}
@@ -6639,39 +6621,34 @@
predicate(false);
format %{ "SUB $dst, $src, oop_base \t// encode" %}
- size(4);
- ins_encode %{
- // TODO: PPC port $archOpcode(ppc64Opcode_subf);
- __ subf($dst$$Register, R30, $src$$Register);
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ sub_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
%}
ins_pipe(pipe_class_default);
%}
// Conditional sub base.
-instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
// The match rule is needed to make it a 'MachTypeNode'!
match(Set dst (EncodeP (Binary crx src1)));
predicate(false);
- ins_variable_size_depending_on_alignment(true);
-
format %{ "BEQ $crx, done\n\t"
- "SUB $dst, $src1, R30 \t// encode: subtract base if != NULL\n"
+ "SUB $dst, $src1, heapbase \t// encode: subtract base if != NULL\n"
"done:" %}
- size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
- ins_encode %{
- // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
Label done;
__ beq($crx$$CondRegister, done);
- __ subf($dst$$Register, R30, $src1$$Register);
- // TODO PPC port __ endgroup_if_needed(_size == 12);
+ __ sub_const_optimized($dst$$Register, $src1$$Register, Universe::narrow_oop_base(), R0);
__ bind(done);
%}
ins_pipe(pipe_class_default);
%}
// Power 7 can use isel instruction
-instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
// The match rule is needed to make it a 'MachTypeNode'!
match(Set dst (EncodeP (Binary crx src1)));
predicate(false);
@@ -6777,42 +6754,37 @@
match(Set dst (DecodeN src));
predicate(false);
- format %{ "ADD $dst, $src, R30 \t// DecodeN, add oop base" %}
- size(4);
- ins_encode %{
- // TODO: PPC port $archOpcode(ppc64Opcode_add);
- __ add($dst$$Register, $src$$Register, R30);
+ format %{ "ADD $dst, $src, heapbase \t// DecodeN, add oop base" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
%}
ins_pipe(pipe_class_default);
%}
// conditianal add base for expand
-instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
// The match rule is needed to make it a 'MachTypeNode'!
// NOTICE that the rule is nonsense - we just have to make sure that:
// - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
// - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
- match(Set dst (DecodeN (Binary crx src1)));
+ match(Set dst (DecodeN (Binary crx src)));
predicate(false);
- ins_variable_size_depending_on_alignment(true);
-
format %{ "BEQ $crx, done\n\t"
- "ADD $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n"
+ "ADD $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n"
"done:" %}
- size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8);
- ins_encode %{
- // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
Label done;
__ beq($crx$$CondRegister, done);
- __ add($dst$$Register, $src1$$Register, R30);
- // TODO PPC port __ endgroup_if_needed(_size == 12);
+ __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
__ bind(done);
%}
ins_pipe(pipe_class_default);
%}
-instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
// The match rule is needed to make it a 'MachTypeNode'!
// NOTICE that the rule is nonsense - we just have to make sure that:
// - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
@@ -6888,7 +6860,7 @@
Universe::narrow_oop_base_disjoint());
ins_cost(DEFAULT_COST);
- format %{ "MOV $dst, R30 \t\n"
+ format %{ "MOV $dst, heapbase \t\n"
"RLDIMI $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
postalloc_expand %{
loadBaseNode *n1 = new loadBaseNode();
@@ -6946,7 +6918,7 @@
assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
ra_->set_oop(n_cond_set, true);
-
+
ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
@@ -7303,7 +7275,7 @@
//----------Conditional Move---------------------------------------------------
// Cmove using isel.
-instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
+instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
predicate(VM_Version::has_isel());
ins_cost(DEFAULT_COST);
@@ -7321,7 +7293,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
+instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
predicate(!VM_Version::has_isel());
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7335,7 +7307,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{
+instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{
match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7349,7 +7321,7 @@
%}
// Cmove using isel.
-instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
+instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
predicate(VM_Version::has_isel());
ins_cost(DEFAULT_COST);
@@ -7367,7 +7339,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
+instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
predicate(!VM_Version::has_isel());
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7381,7 +7353,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{
+instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{
match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7395,7 +7367,7 @@
%}
// Cmove using isel.
-instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
+instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
predicate(VM_Version::has_isel());
ins_cost(DEFAULT_COST);
@@ -7414,7 +7386,7 @@
%}
// Conditional move for RegN. Only cmov(reg, reg).
-instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
+instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
predicate(!VM_Version::has_isel());
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7428,7 +7400,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{
+instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{
match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7442,7 +7414,7 @@
%}
// Cmove using isel.
-instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{
+instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
predicate(VM_Version::has_isel());
ins_cost(DEFAULT_COST);
@@ -7460,7 +7432,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{
+instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{
match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
predicate(!VM_Version::has_isel());
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7474,7 +7446,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{
+instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{
match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7487,7 +7459,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{
+instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7509,7 +7481,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{
+instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7542,8 +7514,9 @@
// Mem_ptr must be a memory operand, else this node does not get
// Flag_needs_anti_dependence_check set by adlc. If this is not set this node
// can be rematerialized which leads to errors.
-instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{
+instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{
match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
+ effect(TEMP cr0);
format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
@@ -7560,16 +7533,16 @@
// Mem_ptr must be a memory operand, else this node does not get
// Flag_needs_anti_dependence_check set by adlc. If this is not set this node
// can be rematerialized which leads to errors.
-instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
- match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal)));
- format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
- ins_encode %{
- // TODO: PPC port $archOpcode(ppc64Opcode_compound);
- __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
- MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
- noreg, NULL, true);
- %}
- ins_pipe(pipe_class_default);
+instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
+ match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal)));
+ ins_cost(2*MEMORY_REF_COST);
+
+ format %{ "STDCX_ if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_stdcx_);
+ __ stdcx_($newVal$$Register, $mem_ptr$$Register);
+ %}
+ ins_pipe(pipe_class_memory);
%}
// Implement LoadPLocked. Must be ordered against changes of the memory location
@@ -7577,13 +7550,14 @@
// Don't know whether this is ever used.
instruct loadPLocked(iRegPdst dst, memory mem) %{
match(Set dst (LoadPLocked mem));
- ins_cost(MEMORY_REF_COST);
-
- format %{ "LD $dst, $mem \t// loadPLocked\n\t"
- "TWI $dst\n\t"
- "ISYNC" %}
- size(12);
- ins_encode( enc_ld_ac(dst, mem) );
+ ins_cost(2*MEMORY_REF_COST);
+
+ format %{ "LDARX $dst, $mem \t// loadPLocked\n\t" %}
+ size(4);
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_ldarx);
+ __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
+ %}
ins_pipe(pipe_class_memory);
%}
@@ -7593,8 +7567,9 @@
// (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))" cannot be
// matched.
-instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{
+instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
+ effect(TEMP cr0);
format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode %{
@@ -7607,8 +7582,9 @@
ins_pipe(pipe_class_default);
%}
-instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{
+instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
+ effect(TEMP cr0);
format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode %{
@@ -7621,8 +7597,9 @@
ins_pipe(pipe_class_default);
%}
-instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{
+instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
+ effect(TEMP cr0);
format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode %{
@@ -7635,8 +7612,9 @@
ins_pipe(pipe_class_default);
%}
-instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{
+instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
+ effect(TEMP cr0);
format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode %{
@@ -7649,48 +7627,54 @@
ins_pipe(pipe_class_default);
%}
-instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
+instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
match(Set res (GetAndAddI mem_ptr src));
+ effect(TEMP cr0);
format %{ "GetAndAddI $res, $mem_ptr, $src" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode( enc_GetAndAddI(res, mem_ptr, src) );
ins_pipe(pipe_class_default);
%}
-instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
+instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
match(Set res (GetAndAddL mem_ptr src));
+ effect(TEMP cr0);
format %{ "GetAndAddL $res, $mem_ptr, $src" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode( enc_GetAndAddL(res, mem_ptr, src) );
ins_pipe(pipe_class_default);
%}
-instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
+instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
match(Set res (GetAndSetI mem_ptr src));
+ effect(TEMP cr0);
format %{ "GetAndSetI $res, $mem_ptr, $src" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
ins_pipe(pipe_class_default);
%}
-instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
+instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
match(Set res (GetAndSetL mem_ptr src));
+ effect(TEMP cr0);
format %{ "GetAndSetL $res, $mem_ptr, $src" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
ins_pipe(pipe_class_default);
%}
-instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{
+instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
match(Set res (GetAndSetP mem_ptr src));
+ effect(TEMP cr0);
format %{ "GetAndSetP $res, $mem_ptr, $src" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
ins_pipe(pipe_class_default);
%}
-instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{
+instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
match(Set res (GetAndSetN mem_ptr src));
+ effect(TEMP cr0);
format %{ "GetAndSetN $res, $mem_ptr, $src" %}
// Variable size: instruction count smaller if regs are disjoint.
ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
@@ -7898,18 +7882,8 @@
%}
// Immediate Subtraction
-// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal),
-// so this rule seems to be unused.
-instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
- match(Set dst (SubI src1 src2));
- format %{ "SUBI $dst, $src1, $src2" %}
- size(4);
- ins_encode %{
- // TODO: PPC port $archOpcode(ppc64Opcode_addi);
- __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
- %}
- ins_pipe(pipe_class_default);
-%}
+// Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
+// Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
// SubI from constant (using subfic).
instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
@@ -7989,22 +7963,6 @@
ins_pipe(pipe_class_default);
%}
-// Immediate Subtraction
-// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
-// so this rule seems to be unused.
-// No constant pool entries required.
-instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
- match(Set dst (SubL src1 src2));
-
- format %{ "SUBI $dst, $src1, $src2 \t// long" %}
- size(4);
- ins_encode %{
- // TODO: PPC port $archOpcode(ppc64Opcode_addi);
- __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
- %}
- ins_pipe(pipe_class_default);
-%}
-
// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
// positive longs and 0xF...F for negative ones.
instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
@@ -8165,7 +8123,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{
+instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
effect(USE_DEF dst, USE src1, USE crx);
predicate(false);
@@ -8228,7 +8186,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{
+instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
effect(USE_DEF dst, USE src1, USE crx);
predicate(false);
@@ -8281,7 +8239,7 @@
%}
// Long Remainder with registers
-instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
+instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
match(Set dst (ModL src1 src2));
ins_cost(10*DEFAULT_COST);
@@ -9011,7 +8969,6 @@
instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
match(Set dst (AndL src1 src2));
effect(KILL cr0);
- ins_cost(DEFAULT_COST);
format %{ "ANDI $dst, $src1, $src2 \t// long" %}
size(4);
@@ -9803,7 +9760,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{
+instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
// no match-rule, false predicate
effect(DEF dst, USE crx, USE src);
predicate(false);
@@ -9817,7 +9774,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{
+instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
// no match-rule, false predicate
effect(DEF dst, USE crx, USE mem);
predicate(false);
@@ -9972,7 +9929,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{
+instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
// no match-rule, false predicate
effect(DEF dst, USE crx, USE src);
predicate(false);
@@ -9986,7 +9943,7 @@
ins_pipe(pipe_class_default);
%}
-instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{
+instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
// no match-rule, false predicate
effect(DEF dst, USE crx, USE mem);
predicate(false);
@@ -10255,7 +10212,6 @@
size(4);
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_andi_);
- // FIXME: avoid andi_ ?
__ andi_(R0, $src1$$Register, $src2$$constant);
%}
ins_pipe(pipe_class_compare);
@@ -10302,13 +10258,12 @@
size(4);
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_andi_);
- // FIXME: avoid andi_ ?
__ andi_(R0, $src1$$Register, $src2$$constant);
%}
ins_pipe(pipe_class_compare);
%}
-instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{
+instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsRegSrc crx) %{
// no match-rule, false predicate
effect(DEF dst, USE crx);
predicate(false);
@@ -10332,7 +10287,7 @@
ins_pipe(pipe_class_compare);
%}
-instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{
+instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsRegSrc crx) %{
// no match-rule, false predicate
effect(DEF dst, USE crx);
predicate(false);
@@ -10622,8 +10577,9 @@
//----------Float Compares----------------------------------------------------
instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
+ // Needs matchrule, see cmpDUnordered.
+ match(Set crx (CmpF src1 src2));
// no match-rule, false predicate
- effect(DEF crx, USE src1, USE src2);
predicate(false);
format %{ "cmpFUrd $crx, $src1, $src2" %}
@@ -10731,8 +10687,14 @@
%}
instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
- // no match-rule, false predicate
- effect(DEF crx, USE src1, USE src2);
+ // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
+ // node right before the conditional move using it.
+ // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
+ // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
+ // crashed in register allocation where the flags Reg between cmpDUnoredered and a
+ // conditional move was supposed to be spilled.
+ match(Set crx (CmpD src1 src2));
+ // False predicate, shall not be matched.
predicate(false);
format %{ "cmpFUrd $crx, $src1, $src2" %}
@@ -10830,7 +10792,7 @@
%}
// Conditional Near Branch
-instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
// Same match rule as `branchConFar'.
match(If cmp crx);
effect(USE lbl);
@@ -10853,7 +10815,7 @@
// expensive.
//
// Conditional Far Branch
-instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
// Same match rule as `branchCon'.
match(If cmp crx);
effect(USE crx, USE lbl);
@@ -10871,7 +10833,7 @@
%}
// Conditional Branch used with Power6 scheduler (can be far or short).
-instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchConSched(cmpOp cmp, flagsRegSrc crx, label lbl) %{
// Same match rule as `branchCon'.
match(If cmp crx);
effect(USE crx, USE lbl);
@@ -10890,7 +10852,7 @@
ins_pipe(pipe_class_default);
%}
-instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
match(CountedLoopEnd cmp crx);
effect(USE labl);
ins_cost(BRANCH_COST);
@@ -10904,7 +10866,7 @@
ins_pipe(pipe_class_default);
%}
-instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
match(CountedLoopEnd cmp crx);
effect(USE labl);
predicate(!false /* TODO: PPC port HB_Schedule */);
@@ -10920,7 +10882,7 @@
%}
// Conditional Branch used with Power6 scheduler (can be far or short).
-instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEndSched(cmpOp cmp, flagsRegSrc crx, label labl) %{
match(CountedLoopEnd cmp crx);
effect(USE labl);
predicate(false /* TODO: PPC port HB_Schedule */);
@@ -10969,13 +10931,36 @@
instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
match(Set crx (FastLock oop box));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
- // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking);
+ predicate(/*(!UseNewFastLockPPC64 || UseBiasedLocking) &&*/ !Compile::current()->use_rtm());
format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3" %}
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
__ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
- $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
+ $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+ UseBiasedLocking && !UseOptoBiasInlining); // SAPJVM MD 2014-11-06 UseOptoBiasInlining
+ // If locking was successfull, crx should indicate 'EQ'.
+ // The compiler generates a branch to the runtime call to
+ // _complete_monitor_locking_Java for the case where crx is 'NE'.
+ %}
+ ins_pipe(pipe_class_compare);
+%}
+
+// Separate version for TM. Use bound register for box to enable USE_KILL.
+instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
+ match(Set crx (FastLock oop box));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
+ predicate(Compile::current()->use_rtm());
+
+ format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
+ $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+ /*Biased Locking*/ false,
+ _rtm_counters, _stack_rtm_counters,
+ ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
+ /*TM*/ true, ra_->C->profile_rtm());
// If locking was successfull, crx should indicate 'EQ'.
// The compiler generates a branch to the runtime call to
// _complete_monitor_locking_Java for the case where crx is 'NE'.
@@ -10986,12 +10971,33 @@
instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
match(Set crx (FastUnlock oop box));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ predicate(!Compile::current()->use_rtm());
format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %}
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
__ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
- $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
+ $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+ UseBiasedLocking && !UseOptoBiasInlining,
+ false);
+ // If unlocking was successfull, crx should indicate 'EQ'.
+ // The compiler generates a branch to the runtime call to
+ // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
+ %}
+ ins_pipe(pipe_class_compare);
+%}
+
+instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
+ match(Set crx (FastUnlock oop box));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ predicate(Compile::current()->use_rtm());
+
+ format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2 (TM)" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
+ $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+ /*Biased Locking*/ false, /*TM*/ true);
// If unlocking was successfull, crx should indicate 'EQ'.
// The compiler generates a branch to the runtime call to
// _complete_monitor_unlocking_Java for the case where crx is 'NE'.
@@ -11658,6 +11664,66 @@
ins_pipe(pipe_class_default);
%}
+
+//----------Overflow Math Instructions-----------------------------------------
+
+// Note that we have to make sure that XER.SO is reset before using overflow instructions.
+// Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
+// Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
+
+instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+ match(Set cr0 (OverflowAddL op1 op2));
+
+ format %{ "add_ $op1, $op2\t# overflow check long" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ li(R0, 0);
+ __ mtxer(R0); // clear XER.SO
+ __ addo_(R0, $op1$$Register, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+ match(Set cr0 (OverflowSubL op1 op2));
+
+ format %{ "subfo_ R0, $op2, $op1\t# overflow check long" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ li(R0, 0);
+ __ mtxer(R0); // clear XER.SO
+ __ subfo_(R0, $op2$$Register, $op1$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
+ match(Set cr0 (OverflowSubL zero op2));
+
+ format %{ "nego_ R0, $op2\t# overflow check long" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ li(R0, 0);
+ __ mtxer(R0); // clear XER.SO
+ __ nego_(R0, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+ match(Set cr0 (OverflowMulL op1 op2));
+
+ format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ __ li(R0, 0);
+ __ mtxer(R0); // clear XER.SO
+ __ mulldo_(R0, $op1$$Register, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+
// ============================================================================
// Safepoint Instruction
--- a/hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,19 +23,10 @@
*
*/
-// make sure the defines don't screw up the declarations later on in this file
+// Make sure the defines don't screw up the declarations later on in this file.
#define DONT_USE_REGISTER_DEFINES
-#include "precompiled.hpp"
-#include "asm/macroAssembler.hpp"
#include "asm/register.hpp"
-#include "register_ppc.hpp"
-#ifdef TARGET_ARCH_MODEL_ppc_32
-# include "interp_masm_ppc_32.hpp"
-#endif
-#ifdef TARGET_ARCH_MODEL_ppc_64
-# include "interp_masm_ppc_64.hpp"
-#endif
REGISTER_DEFINITION(Register, noreg);
--- a/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,14 +25,12 @@
#include "precompiled.hpp"
#include "asm/assembler.inline.hpp"
-#include "assembler_ppc.inline.hpp"
#include "code/relocInfo.hpp"
#include "nativeInst_ppc.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/safepoint.hpp"
void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
- bool copy_back_to_oop_pool = true; // TODO: PPC port
// The following comment is from the declaration of DataRelocation:
//
// "The "o" (displacement) argument is relevant only to split relocations
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
#include "code/debugInfoRec.hpp"
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
+#include "frame_ppc.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interp_masm.hpp"
#include "oops/compiledICHolder.hpp"
@@ -194,8 +195,8 @@
RegisterSaver_LiveIntReg( R27 ),
RegisterSaver_LiveIntReg( R28 ),
RegisterSaver_LiveIntReg( R29 ),
- RegisterSaver_LiveIntReg( R31 ),
- RegisterSaver_LiveIntReg( R30 ), // r30 must be the last register
+ RegisterSaver_LiveIntReg( R30 ),
+ RegisterSaver_LiveIntReg( R31 ), // must be the last register (see save/restore functions below)
};
OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
@@ -229,29 +230,30 @@
BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
- // Save r30 in the last slot of the not yet pushed frame so that we
+ // Save r31 in the last slot of the not yet pushed frame so that we
// can use it as scratch reg.
- __ std(R30, -reg_size, R1_SP);
+ __ std(R31, -reg_size, R1_SP);
assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
"consistency check");
// save the flags
// Do the save_LR_CR by hand and adjust the return pc if requested.
- __ mfcr(R30);
- __ std(R30, _abi(cr), R1_SP);
+ __ mfcr(R31);
+ __ std(R31, _abi(cr), R1_SP);
switch (return_pc_location) {
- case return_pc_is_lr: __ mflr(R30); break;
- case return_pc_is_r4: __ mr(R30, R4); break;
+ case return_pc_is_lr: __ mflr(R31); break;
+ case return_pc_is_r4: __ mr(R31, R4); break;
case return_pc_is_thread_saved_exception_pc:
- __ ld(R30, thread_(saved_exception_pc)); break;
+ __ ld(R31, thread_(saved_exception_pc)); break;
default: ShouldNotReachHere();
}
- if (return_pc_adjustment != 0)
- __ addi(R30, R30, return_pc_adjustment);
- __ std(R30, _abi(lr), R1_SP);
+ if (return_pc_adjustment != 0) {
+ __ addi(R31, R31, return_pc_adjustment);
+ }
+ __ std(R31, _abi(lr), R1_SP);
// push a new frame
- __ push_frame(frame_size_in_bytes, R30);
+ __ push_frame(frame_size_in_bytes, R31);
// save all registers (ints and floats)
offset = register_save_offset;
@@ -261,7 +263,7 @@
switch (reg_type) {
case RegisterSaver::int_reg: {
- if (reg_num != 30) { // We spilled R30 right at the beginning.
+ if (reg_num != 31) { // We spilled R31 right at the beginning.
__ std(as_Register(reg_num), offset, R1_SP);
}
break;
@@ -272,8 +274,8 @@
}
case RegisterSaver::special_reg: {
if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
- __ mfctr(R30);
- __ std(R30, offset, R1_SP);
+ __ mfctr(R31);
+ __ std(R31, offset, R1_SP);
} else {
Unimplemented();
}
@@ -321,7 +323,7 @@
switch (reg_type) {
case RegisterSaver::int_reg: {
- if (reg_num != 30) // R30 restored at the end, it's the tmp reg!
+ if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
__ ld(as_Register(reg_num), offset, R1_SP);
break;
}
@@ -332,8 +334,8 @@
case RegisterSaver::special_reg: {
if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
- __ ld(R30, offset, R1_SP);
- __ mtctr(R30);
+ __ ld(R31, offset, R1_SP);
+ __ mtctr(R31);
}
} else {
Unimplemented();
@@ -350,10 +352,10 @@
__ pop_frame();
// restore the flags
- __ restore_LR_CR(R30);
+ __ restore_LR_CR(R31);
// restore scratch register's value
- __ ld(R30, -reg_size, R1_SP);
+ __ ld(R31, -reg_size, R1_SP);
BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
}
@@ -2021,6 +2023,8 @@
__ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame.
frame_done_pc = (intptr_t)__ pc();
+ __ verify_thread();
+
// Native nmethod wrappers never take possesion of the oop arguments.
// So the caller will gc the arguments.
// The only thing we need an oopMap for is if the call is static.
@@ -2594,7 +2598,7 @@
}
uint SharedRuntime::out_preserve_stack_slots() {
-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
#else
return 0;
@@ -2868,11 +2872,6 @@
__ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
__ BIND(skip_restore_excp);
- // reload narrro_oop_base
- if (UseCompressedOops && Universe::narrow_oop_base() != 0) {
- __ load_const_optimized(R30, Universe::narrow_oop_base());
- }
-
__ pop_frame();
// stack: (deoptee, optional i2c, caller of deoptee, ...).
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -261,9 +261,6 @@
// global toc register
__ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
- // Load narrow oop base.
- __ reinit_heapbase(R30, R11_scratch1);
-
// Remember the senderSP so we interpreter can pop c2i arguments off of the stack
// when called via a c2i.
@@ -418,6 +415,23 @@
// or native call stub. The pending exception in Thread is
// converted into a Java-level exception.
//
+ // Read:
+ //
+ // LR: The pc the runtime library callee wants to return to.
+ // Since the exception occurred in the callee, the return pc
+ // from the point of view of Java is the exception pc.
+ // thread: Needed for method handles.
+ //
+ // Invalidate:
+ //
+ // volatile registers (except below).
+ //
+ // Update:
+ //
+ // R4_ARG2: exception
+ //
+ // (LR is unchanged and is live out).
+ //
address generate_forward_exception() {
StubCodeMark mark(this, "StubRoutines", "forward_exception");
address start = __ pc();
@@ -1256,9 +1270,9 @@
Register tmp3 = R8_ARG6;
#if defined(ABI_ELFv2)
- address nooverlap_target = aligned ?
- StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
- StubRoutines::jbyte_disjoint_arraycopy();
+ address nooverlap_target = aligned ?
+ StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
+ StubRoutines::jbyte_disjoint_arraycopy();
#else
address nooverlap_target = aligned ?
((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -264,11 +264,11 @@
__ cmpdi(CCR0, Rmdo, 0);
__ beq(CCR0, no_mdo);
- // Increment invocation counter in the MDO.
- const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
- __ lwz(Rscratch2, mdo_ic_offs, Rmdo);
+ // Increment backedge counter in the MDO.
+ const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+ __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
__ addi(Rscratch2, Rscratch2, increment);
- __ stw(Rscratch2, mdo_ic_offs, Rmdo);
+ __ stw(Rscratch2, mdo_bc_offs, Rmdo);
__ load_const_optimized(Rscratch1, mask, R0);
__ and_(Rscratch1, Rscratch2, Rscratch1);
__ bne(CCR0, done);
@@ -276,12 +276,12 @@
}
// Increment counter in MethodCounters*.
- const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+ const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
__ bind(no_mdo);
__ get_method_counters(R19_method, R3_counters, done);
- __ lwz(Rscratch2, mo_ic_offs, R3_counters);
+ __ lwz(Rscratch2, mo_bc_offs, R3_counters);
__ addi(Rscratch2, Rscratch2, increment);
- __ stw(Rscratch2, mo_ic_offs, R3_counters);
+ __ stw(Rscratch2, mo_bc_offs, R3_counters);
__ load_const_optimized(Rscratch1, mask, R0);
__ and_(Rscratch1, Rscratch2, Rscratch1);
__ beq(CCR0, *overflow);
@@ -611,12 +611,7 @@
// For others we can use a normal (native) entry.
inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
- // Provide math entry with debugging on demand.
- // Note: Debugging changes which code will get executed:
- // Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call.
- // Not debugging and enabled InlineIntrinics: processor instruction will get used.
- // Result might differ slightly due to rounding etc.
- if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry.
+ if (!InlineIntrinsics) return false;
return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) ||
(kind==Interpreter::java_lang_math_abs));
@@ -628,15 +623,8 @@
return Interpreter::entry_for_kind(Interpreter::zerolocals);
}
- Label Lslow_path;
- const Register Rjvmti_mode = R11_scratch1;
address entry = __ pc();
- // Provide math entry with debugging on demand.
- __ lwz(Rjvmti_mode, thread_(interp_only_mode));
- __ cmpwi(CCR0, Rjvmti_mode, 0);
- __ bne(CCR0, Lslow_path); // jvmti_mode!=0
-
__ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
// Pop c2i arguments (if any) off when we return.
@@ -659,9 +647,6 @@
// And we're done.
__ blr();
- // Provide slow path for JVMTI case.
- __ bind(Lslow_path);
- __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2);
__ flush();
return entry;
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
// Run with +PrintInterpreter to get the VM to print out the size.
// Max size with JVMTI
- const static int InterpreterCodeSize = 210*K;
+ const static int InterpreterCodeSize = 230*K;
#endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -375,23 +375,22 @@
int index_size = wide ? sizeof(u2) : sizeof(u1);
const Register Rscratch = R11_scratch1;
- Label resolved;
+ Label is_null;
// We are resolved if the resolved reference cache entry contains a
// non-null object (CallSite, etc.)
__ get_cache_index_at_bcp(Rscratch, 1, index_size); // Load index.
- __ load_resolved_reference_at_index(R17_tos, Rscratch);
- __ cmpdi(CCR0, R17_tos, 0);
- __ bne(CCR0, resolved);
+ __ load_resolved_reference_at_index(R17_tos, Rscratch, &is_null);
+ __ verify_oop(R17_tos);
+ __ dispatch_epilog(atos, Bytecodes::length_for(bytecode()));
+
+ __ bind(is_null);
__ load_const_optimized(R3_ARG1, (int)bytecode());
address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
// First time invocation - must resolve first.
__ call_VM(R17_tos, entry, R3_ARG1);
-
- __ align(32, 12);
- __ bind(resolved);
__ verify_oop(R17_tos);
}
@@ -3795,9 +3794,9 @@
transition(atos, itos);
Label Ldone, Lis_null, Lquicked, Lresolved;
- Register Roffset = R5_ARG3,
+ Register Roffset = R6_ARG4,
RobjKlass = R4_ARG2,
- RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register.
+ RspecifiedKlass = R5_ARG3,
Rcpool = R11_scratch1,
Rtags = R12_scratch2;
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -32,12 +32,13 @@
#include "runtime/os.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "utilities/defaultStream.hpp"
+#include "utilities/globalDefinitions.hpp"
#include "vm_version_ppc.hpp"
# include <sys/sysinfo.h>
int VM_Version::_features = VM_Version::unknown_m;
-int VM_Version::_measured_cache_line_size = 128; // default value
+int VM_Version::_measured_cache_line_size = 32; // pessimistic init value
const char* VM_Version::_features_str = "";
bool VM_Version::_is_determine_features_test_running = false;
@@ -55,7 +56,9 @@
// If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
- if (VM_Version::has_popcntw()) {
+ if (VM_Version::has_lqarx()) {
+ FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
+ } else if (VM_Version::has_popcntw()) {
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
} else if (VM_Version::has_cmpb()) {
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
@@ -66,8 +69,14 @@
}
}
guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
- PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
- "PowerArchitecturePPC64 should be 0, 5, 6 or 7");
+ PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
+ PowerArchitecturePPC64 == 8,
+ "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
+
+ // Power 8: Configure Data Stream Control Register.
+ if (PowerArchitecturePPC64 >= 8) {
+ config_dscr();
+ }
if (!UseSIGTRAP) {
MSG(TrapBasedICMissChecks);
@@ -97,7 +106,7 @@
// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
- "ppc64%s%s%s%s%s%s%s%s",
+ "ppc64%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
@@ -106,11 +115,17 @@
(has_popcntb() ? " popcntb" : ""),
(has_popcntw() ? " popcntw" : ""),
(has_fcfids() ? " fcfids" : ""),
- (has_vand() ? " vand" : "")
+ (has_vand() ? " vand" : ""),
+ (has_lqarx() ? " lqarx" : ""),
+ (has_vcipher() ? " vcipher" : ""),
+ (has_vpmsumb() ? " vpmsumb" : ""),
+ (has_tcheck() ? " tcheck" : "")
// Make sure number of %s matches num_features!
);
_features_str = os::strdup(buf);
- NOT_PRODUCT(if (Verbose) print_features(););
+ if (Verbose) {
+ print_features();
+ }
// PPC64 supports 8-byte compare-exchange operations (see
// Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
@@ -171,6 +186,58 @@
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ // Adjust RTM (Restricted Transactional Memory) flags.
+ if (!has_tcheck() && UseRTMLocking) {
+ // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+ // setting during arguments processing. See use_biased_locking().
+ // VM_Version_init() is executed after UseBiasedLocking is used
+ // in Thread::allocate().
+ vm_exit_during_initialization("RTM instructions are not available on this CPU");
+ }
+
+ if (UseRTMLocking) {
+#if INCLUDE_RTM_OPT
+ if (!UnlockExperimentalVMOptions) {
+ vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. "
+ "It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
+ } else {
+ warning("UseRTMLocking is only available as experimental option on this platform.");
+ }
+ if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
+ // RTM locking should be used only for applications with
+ // high lock contention. For now we do not use it by default.
+ vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
+ }
+ if (!is_power_of_2(RTMTotalCountIncrRate)) {
+ warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
+ FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
+ }
+ if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
+ warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
+ FLAG_SET_DEFAULT(RTMAbortRatio, 50);
+ }
+ FLAG_SET_ERGO(bool, UseNewFastLockPPC64, false); // Does not implement TM.
+ guarantee(RTMSpinLoopCount > 0, "unsupported");
+#else
+ // Only C2 does RTM locking optimization.
+ // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+ // setting during arguments processing. See use_biased_locking().
+ vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
+#endif
+ } else { // !UseRTMLocking
+ if (UseRTMForStackLocks) {
+ if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
+ warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
+ }
+ FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
+ }
+ if (UseRTMDeopt) {
+ FLAG_SET_DEFAULT(UseRTMDeopt, false);
+ }
+ if (PrintPreciseRTMLockingStatistics) {
+ FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
+ }
+ }
// This machine does not allow unaligned memory accesses
if (UseUnalignedAccesses) {
@@ -180,6 +247,27 @@
}
}
+bool VM_Version::use_biased_locking() {
+#if INCLUDE_RTM_OPT
+ // RTM locking is most useful when there is high lock contention and
+ // low data contention. With high lock contention the lock is usually
+ // inflated and biased locking is not suitable for that case.
+ // RTM locking code requires that biased locking is off.
+ // Note: we can't switch off UseBiasedLocking in get_processor_features()
+ // because it is used by Thread::allocate() which is called before
+ // VM_Version::initialize().
+ if (UseRTMLocking && UseBiasedLocking) {
+ if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
+ FLAG_SET_DEFAULT(UseBiasedLocking, false);
+ } else {
+ warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
+ UseBiasedLocking = false;
+ }
+ }
+#endif
+ return UseBiasedLocking;
+}
+
void VM_Version::print_features() {
tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
}
@@ -443,16 +531,19 @@
// Don't use R0 in ldarx.
// Keep R3_ARG1 unmodified, it contains &field (see below).
// Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
- a->fsqrt(F3, F4); // code[0] -> fsqrt_m
- a->fsqrts(F3, F4); // code[1] -> fsqrts_m
- a->isel(R7, R5, R6, 0); // code[2] -> isel_m
- a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
- a->cmpb(R7, R5, R6); // code[4] -> bcmp
- //a->mftgpr(R7, F3); // code[5] -> mftgpr
- a->popcntb(R7, R5); // code[6] -> popcntb
- a->popcntw(R7, R5); // code[7] -> popcntw
- a->fcfids(F3, F4); // code[8] -> fcfids
- a->vand(VR0, VR0, VR0); // code[9] -> vand
+ a->fsqrt(F3, F4); // code[0] -> fsqrt_m
+ a->fsqrts(F3, F4); // code[1] -> fsqrts_m
+ a->isel(R7, R5, R6, 0); // code[2] -> isel_m
+ a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
+ a->cmpb(R7, R5, R6); // code[4] -> cmpb
+ a->popcntb(R7, R5); // code[5] -> popcntb
+ a->popcntw(R7, R5); // code[6] -> popcntw
+ a->fcfids(F3, F4); // code[7] -> fcfids
+ a->vand(VR0, VR0, VR0); // code[8] -> vand
+ a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m
+ a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher
+ a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb
+ a->tcheck(0); // code[12] -> tcheck
a->blr();
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -491,11 +582,14 @@
if (code[feature_cntr++]) features |= isel_m;
if (code[feature_cntr++]) features |= lxarxeh_m;
if (code[feature_cntr++]) features |= cmpb_m;
- //if(code[feature_cntr++])features |= mftgpr_m;
if (code[feature_cntr++]) features |= popcntb_m;
if (code[feature_cntr++]) features |= popcntw_m;
if (code[feature_cntr++]) features |= fcfids_m;
if (code[feature_cntr++]) features |= vand_m;
+ if (code[feature_cntr++]) features |= lqarx_m;
+ if (code[feature_cntr++]) features |= vcipher_m;
+ if (code[feature_cntr++]) features |= vpmsumb_m;
+ if (code[feature_cntr++]) features |= tcheck_m;
// Print the detection code.
if (PrintAssembly) {
@@ -507,6 +601,69 @@
_features = features;
}
+// Power 8: Configure Data Stream Control Register.
+void VM_Version::config_dscr() {
+ assert(has_tcheck(), "Only execute on Power 8 or later!");
+
+ // 7 InstWords for each call (function descriptor + blr instruction).
+ const int code_size = (2+2*7)*BytesPerInstWord;
+
+ // Allocate space for the code.
+ ResourceMark rm;
+ CodeBuffer cb("config_dscr", code_size, 0);
+ MacroAssembler* a = new MacroAssembler(&cb);
+
+ // Emit code.
+ uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd();
+ uint32_t *code = (uint32_t *)a->pc();
+ a->mfdscr(R3);
+ a->blr();
+
+ void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd();
+ a->mtdscr(R3);
+ a->blr();
+
+ uint32_t *code_end = (uint32_t *)a->pc();
+ a->flush();
+
+ // Print the detection code.
+ if (PrintAssembly) {
+ ttyLocker ttyl;
+ tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code);
+ Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
+ }
+
+ // Apply the configuration if needed.
+ uint64_t dscr_val = (*get_dscr)();
+ if (Verbose) {
+ tty->print_cr("dscr value was 0x%lx" , dscr_val);
+ }
+ bool change_requested = false;
+ if (DSCR_PPC64 != (uintx)-1) {
+ dscr_val = DSCR_PPC64;
+ change_requested = true;
+ }
+ if (DSCR_DPFD_PPC64 <= 7) {
+ uint64_t mask = 0x7;
+ if ((dscr_val & mask) != DSCR_DPFD_PPC64) {
+ dscr_val = (dscr_val & ~mask) | (DSCR_DPFD_PPC64);
+ change_requested = true;
+ }
+ }
+ if (DSCR_URG_PPC64 <= 7) {
+ uint64_t mask = 0x7 << 6;
+ if ((dscr_val & mask) != DSCR_DPFD_PPC64 << 6) {
+ dscr_val = (dscr_val & ~mask) | (DSCR_URG_PPC64 << 6);
+ change_requested = true;
+ }
+ }
+ if (change_requested) {
+ (*set_dscr)(dscr_val);
+ if (Verbose) {
+ tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)());
+ }
+ }
+}
static int saved_features = 0;
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,10 @@
popcntw,
fcfids,
vand,
- dcba,
+ lqarx,
+ vcipher,
+ vpmsumb,
+ tcheck,
num_features // last entry to count features
};
enum Feature_Flag_Set {
@@ -55,7 +58,10 @@
popcntw_m = (1 << popcntw),
fcfids_m = (1 << fcfids ),
vand_m = (1 << vand ),
- dcba_m = (1 << dcba ),
+ lqarx_m = (1 << lqarx ),
+ vcipher_m = (1 << vcipher),
+ vpmsumb_m = (1 << vpmsumb),
+ tcheck_m = (1 << tcheck ),
all_features_m = -1
};
static int _features;
@@ -65,12 +71,16 @@
static void print_features();
static void determine_features(); // also measures cache line size
+ static void config_dscr(); // Power 8: Configure Data Stream Control Register.
static void determine_section_size();
static void power6_micro_bench();
public:
// Initialization
static void initialize();
+ // Override Abstract_VM_Version implementation
+ static bool use_biased_locking();
+
static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
// CPU instruction support
static bool has_fsqrt() { return (_features & fsqrt_m) != 0; }
@@ -82,7 +92,10 @@
static bool has_popcntw() { return (_features & popcntw_m) != 0; }
static bool has_fcfids() { return (_features & fcfids_m) != 0; }
static bool has_vand() { return (_features & vand_m) != 0; }
- static bool has_dcba() { return (_features & dcba_m) != 0; }
+ static bool has_lqarx() { return (_features & lqarx_m) != 0; }
+ static bool has_vcipher() { return (_features & vcipher_m) != 0; }
+ static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
+ static bool has_tcheck() { return (_features & tcheck_m) != 0; }
static const char* cpu_features() { return _features_str; }
--- a/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp Wed Apr 15 12:44:56 2015 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,6 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "code/vtableStubs.hpp"
#include "interp_masm_ppc_64.hpp"