# HG changeset patch # User thartmann # Date 1455708293 -3600 # Node ID 692adc3fa1b535e3248917bf792975f9550c97a6 # Parent 954c49c0ba57e2e9dd02ef83756d644e5f4455f8 6378256: Performance problem with System.identityHashCode in client compiler Summary: Enabled C1 optimization to try pull out hashCode from object header, before calling into the VM. Reviewed-by: dlong, roland, thartmann Contributed-by: Rahul Raghavan diff -r 954c49c0ba57 -r 692adc3fa1b5 hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Tue Feb 16 09:49:58 2016 -0800 +++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Wed Feb 17 12:24:53 2016 +0100 @@ -2015,23 +2015,33 @@ int vep_offset = ((intptr_t)__ pc()) - start; #ifdef COMPILER1 - if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { - // Object.hashCode can pull the hashCode from the header word - // instead of doing a full VM transition once it's been computed. - // Since hashCode is usually polymorphic at call sites we can't do - // this optimization at the call site without a lot of work. + if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { + // Object.hashCode, System.identityHashCode can pull the hashCode from the + // header word instead of doing a full VM transition once it's been computed. + // Since hashCode is usually polymorphic at call sites we can't do this + // optimization at the call site without a lot of work. Label slowCase; - Register receiver = O0; + Label done; + Register obj_reg = O0; Register result = O0; Register header = G3_scratch; Register hash = G3_scratch; // overwrite header value with hash value Register mask = G1; // to get hash field from header + // Unlike for Object.hashCode, System.identityHashCode is static method and + // gets object as argument instead of the receiver. + if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) { + assert(method->is_static(), "method should be static"); + // return 0 for null reference input + __ br_null(obj_reg, false, Assembler::pn, done); + __ delayed()->mov(obj_reg, hash); + } + // Read the header and build a mask to get its hash field. Give up if the object is not unlocked. // We depend on hash_mask being at most 32 bits and avoid the use of // hash_mask_in_place because it could be larger than 32 bits in a 64-bit // vm: see markOop.hpp. - __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header); + __ ld_ptr(obj_reg, oopDesc::mark_offset_in_bytes(), header); __ sethi(markOopDesc::hash_mask, mask); __ btst(markOopDesc::unlocked_value, header); __ br(Assembler::zero, false, Assembler::pn, slowCase); @@ -2054,6 +2064,7 @@ __ delayed()->nop(); // leaf return. + __ bind(done); __ retl(); __ delayed()->mov(hash, result); __ bind(slowCase); diff -r 954c49c0ba57 -r 692adc3fa1b5 hotspot/src/cpu/x86/vm/sharedRuntime_x86.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86.cpp Wed Feb 17 12:24:53 2016 +0100 @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_x86.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif //COMPILER1 + +#define __ masm-> + +#ifdef COMPILER1 +// --------------------------------------------------------------------------- +// Object.hashCode, System.identityHashCode can pull the hashCode from the +// header word instead of doing a full VM transition once it's been computed. +// Since hashCode is usually polymorphic at call sites we can't do this +// optimization at the call site without a lot of work. +void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* masm, + methodHandle method, + Register obj_reg, + Register result) { + Label slowCase; + + // Unlike for Object.hashCode, System.identityHashCode is static method and + // gets object as argument instead of the receiver. + if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) { + Label Continue; + // return 0 for null reference input + __ cmpptr(obj_reg, (int32_t)NULL_WORD); + __ jcc(Assembler::notEqual, Continue); + __ xorptr(result, result); + __ ret(0); + __ bind(Continue); + } + + __ movptr(result, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + + // check if locked + __ testptr(result, markOopDesc::unlocked_value); + __ jcc(Assembler::zero, slowCase); + + if (UseBiasedLocking) { + // Check if biased and fall through to runtime if so + __ testptr(result, markOopDesc::biased_lock_bit_in_place); + __ jcc(Assembler::notZero, slowCase); + } + + // get hash +#ifdef _LP64 + // Read the header and build a mask to get its hash field. + // Depend on hash_mask being at most 32 bits and avoid the use of hash_mask_in_place + // because it could be larger than 32 bits in a 64-bit vm. See markOop.hpp. + __ shrptr(result, markOopDesc::hash_shift); + __ andptr(result, markOopDesc::hash_mask); +#else + __ andptr(result, markOopDesc::hash_mask_in_place); +#endif //_LP64 + + // test if hashCode exists + __ jcc(Assembler::zero, slowCase); +#ifndef _LP64 + __ shrptr(result, markOopDesc::hash_shift); +#endif + __ ret(0); + __ bind(slowCase); +} +#endif //COMPILER1 + diff -r 954c49c0ba57 -r 692adc3fa1b5 hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Feb 16 09:49:58 2016 -0800 +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Wed Feb 17 12:24:53 2016 +0100 @@ -1754,34 +1754,10 @@ int vep_offset = ((intptr_t)__ pc()) - start; #ifdef COMPILER1 - if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { - // Object.hashCode can pull the hashCode from the header word - // instead of doing a full VM transition once it's been computed. - // Since hashCode is usually polymorphic at call sites we can't do - // this optimization at the call site without a lot of work. - Label slowCase; - Register receiver = rcx; - Register result = rax; - __ movptr(result, Address(receiver, oopDesc::mark_offset_in_bytes())); - - // check if locked - __ testptr(result, markOopDesc::unlocked_value); - __ jcc (Assembler::zero, slowCase); - - if (UseBiasedLocking) { - // Check if biased and fall through to runtime if so - __ testptr(result, markOopDesc::biased_lock_bit_in_place); - __ jcc (Assembler::notZero, slowCase); - } - - // get hash - __ andptr(result, markOopDesc::hash_mask_in_place); - // test if hashCode exists - __ jcc (Assembler::zero, slowCase); - __ shrptr(result, markOopDesc::hash_shift); - __ ret(0); - __ bind (slowCase); - } + // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. + if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { + inline_check_hashcode_from_object_header(masm, method, rcx /*obj_reg*/, rax /*result*/); + } #endif // COMPILER1 // The instruction at the verified entry point must be 5 bytes or longer diff -r 954c49c0ba57 -r 692adc3fa1b5 hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Feb 16 09:49:58 2016 -0800 +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Wed Feb 17 12:24:53 2016 +0100 @@ -2058,6 +2058,13 @@ int vep_offset = ((intptr_t)__ pc()) - start; +#ifdef COMPILER1 + // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. + if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { + inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); + } +#endif // COMPILER1 + // The instruction at the verified entry point must be 5 bytes or longer // because it can be patched on the fly by make_non_entrant. The stack bang // instruction fits that requirement. diff -r 954c49c0ba57 -r 692adc3fa1b5 hotspot/src/share/vm/runtime/sharedRuntime.hpp --- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp Tue Feb 16 09:49:58 2016 -0800 +++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp Wed Feb 17 12:24:53 2016 +0100 @@ -359,6 +359,11 @@ static address clean_opt_virtual_call_entry(); static address clean_static_call_entry(); +#if defined(X86) && defined(COMPILER1) + // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. + static void inline_check_hashcode_from_object_header(MacroAssembler* masm, methodHandle method, Register obj_reg, Register result); +#endif // X86 && COMPILER1 + public: // Read the array of BasicTypes from a Java signature, and compute where