6378256: Performance problem with System.identityHashCode in client compiler
Summary: Enabled C1 optimization to try pull out hashCode from object header, before calling into the VM.
Reviewed-by: dlong, roland, thartmann
Contributed-by: Rahul Raghavan <rahul.v.raghavan@oracle.com>
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Tue Feb 16 09:49:58 2016 -0800
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Wed Feb 17 12:24:53 2016 +0100
@@ -2015,23 +2015,33 @@
int vep_offset = ((intptr_t)__ pc()) - start;
#ifdef COMPILER1
- if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
- // Object.hashCode can pull the hashCode from the header word
- // instead of doing a full VM transition once it's been computed.
- // Since hashCode is usually polymorphic at call sites we can't do
- // this optimization at the call site without a lot of work.
+ if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
+ // Object.hashCode, System.identityHashCode can pull the hashCode from the
+ // header word instead of doing a full VM transition once it's been computed.
+ // Since hashCode is usually polymorphic at call sites we can't do this
+ // optimization at the call site without a lot of work.
Label slowCase;
- Register receiver = O0;
+ Label done;
+ Register obj_reg = O0;
Register result = O0;
Register header = G3_scratch;
Register hash = G3_scratch; // overwrite header value with hash value
Register mask = G1; // to get hash field from header
+ // Unlike for Object.hashCode, System.identityHashCode is static method and
+ // gets object as argument instead of the receiver.
+ if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
+ assert(method->is_static(), "method should be static");
+ // return 0 for null reference input
+ __ br_null(obj_reg, false, Assembler::pn, done);
+ __ delayed()->mov(obj_reg, hash);
+ }
+
// Read the header and build a mask to get its hash field. Give up if the object is not unlocked.
// We depend on hash_mask being at most 32 bits and avoid the use of
// hash_mask_in_place because it could be larger than 32 bits in a 64-bit
// vm: see markOop.hpp.
- __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header);
+ __ ld_ptr(obj_reg, oopDesc::mark_offset_in_bytes(), header);
__ sethi(markOopDesc::hash_mask, mask);
__ btst(markOopDesc::unlocked_value, header);
__ br(Assembler::zero, false, Assembler::pn, slowCase);
@@ -2054,6 +2064,7 @@
__ delayed()->nop();
// leaf return.
+ __ bind(done);
__ retl();
__ delayed()->mov(hash, result);
__ bind(slowCase);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86.cpp Wed Feb 17 12:24:53 2016 +0100
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_x86.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif //COMPILER1
+
+#define __ masm->
+
+#ifdef COMPILER1
+// ---------------------------------------------------------------------------
+// Object.hashCode, System.identityHashCode can pull the hashCode from the
+// header word instead of doing a full VM transition once it's been computed.
+// Since hashCode is usually polymorphic at call sites we can't do this
+// optimization at the call site without a lot of work.
+void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* masm,
+ methodHandle method,
+ Register obj_reg,
+ Register result) {
+ Label slowCase;
+
+ // Unlike for Object.hashCode, System.identityHashCode is static method and
+ // gets object as argument instead of the receiver.
+ if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
+ Label Continue;
+ // return 0 for null reference input
+ __ cmpptr(obj_reg, (int32_t)NULL_WORD);
+ __ jcc(Assembler::notEqual, Continue);
+ __ xorptr(result, result);
+ __ ret(0);
+ __ bind(Continue);
+ }
+
+ __ movptr(result, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+
+ // check if locked
+ __ testptr(result, markOopDesc::unlocked_value);
+ __ jcc(Assembler::zero, slowCase);
+
+ if (UseBiasedLocking) {
+ // Check if biased and fall through to runtime if so
+ __ testptr(result, markOopDesc::biased_lock_bit_in_place);
+ __ jcc(Assembler::notZero, slowCase);
+ }
+
+ // get hash
+#ifdef _LP64
+ // Read the header and build a mask to get its hash field.
+ // Depend on hash_mask being at most 32 bits and avoid the use of hash_mask_in_place
+ // because it could be larger than 32 bits in a 64-bit vm. See markOop.hpp.
+ __ shrptr(result, markOopDesc::hash_shift);
+ __ andptr(result, markOopDesc::hash_mask);
+#else
+ __ andptr(result, markOopDesc::hash_mask_in_place);
+#endif //_LP64
+
+ // test if hashCode exists
+ __ jcc(Assembler::zero, slowCase);
+#ifndef _LP64
+ __ shrptr(result, markOopDesc::hash_shift);
+#endif
+ __ ret(0);
+ __ bind(slowCase);
+}
+#endif //COMPILER1
+
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Feb 16 09:49:58 2016 -0800
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Wed Feb 17 12:24:53 2016 +0100
@@ -1754,34 +1754,10 @@
int vep_offset = ((intptr_t)__ pc()) - start;
#ifdef COMPILER1
- if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
- // Object.hashCode can pull the hashCode from the header word
- // instead of doing a full VM transition once it's been computed.
- // Since hashCode is usually polymorphic at call sites we can't do
- // this optimization at the call site without a lot of work.
- Label slowCase;
- Register receiver = rcx;
- Register result = rax;
- __ movptr(result, Address(receiver, oopDesc::mark_offset_in_bytes()));
-
- // check if locked
- __ testptr(result, markOopDesc::unlocked_value);
- __ jcc (Assembler::zero, slowCase);
-
- if (UseBiasedLocking) {
- // Check if biased and fall through to runtime if so
- __ testptr(result, markOopDesc::biased_lock_bit_in_place);
- __ jcc (Assembler::notZero, slowCase);
- }
-
- // get hash
- __ andptr(result, markOopDesc::hash_mask_in_place);
- // test if hashCode exists
- __ jcc (Assembler::zero, slowCase);
- __ shrptr(result, markOopDesc::hash_shift);
- __ ret(0);
- __ bind (slowCase);
- }
+ // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
+ if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
+ inline_check_hashcode_from_object_header(masm, method, rcx /*obj_reg*/, rax /*result*/);
+ }
#endif // COMPILER1
// The instruction at the verified entry point must be 5 bytes or longer
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Feb 16 09:49:58 2016 -0800
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Wed Feb 17 12:24:53 2016 +0100
@@ -2058,6 +2058,13 @@
int vep_offset = ((intptr_t)__ pc()) - start;
+#ifdef COMPILER1
+ // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
+ if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
+ inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/);
+ }
+#endif // COMPILER1
+
// The instruction at the verified entry point must be 5 bytes or longer
// because it can be patched on the fly by make_non_entrant. The stack bang
// instruction fits that requirement.
--- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp Tue Feb 16 09:49:58 2016 -0800
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp Wed Feb 17 12:24:53 2016 +0100
@@ -359,6 +359,11 @@
static address clean_opt_virtual_call_entry();
static address clean_static_call_entry();
+#if defined(X86) && defined(COMPILER1)
+ // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
+ static void inline_check_hashcode_from_object_header(MacroAssembler* masm, methodHandle method, Register obj_reg, Register result);
+#endif // X86 && COMPILER1
+
public:
// Read the array of BasicTypes from a Java signature, and compute where