--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Tue Feb 24 09:53:20 2009 -0800
@@ -0,0 +1,514 @@
+/*
+ * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_vm_version_x86.cpp.incl"
+
+
+int VM_Version::_cpu;
+int VM_Version::_model;
+int VM_Version::_stepping;
+int VM_Version::_cpuFeatures;
+const char* VM_Version::_features_str = "";
+VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
+
+static BufferBlob* stub_blob;
+static const int stub_size = 300;
+
+extern "C" {
+ typedef void (*getPsrInfo_stub_t)(void*);
+}
+static getPsrInfo_stub_t getPsrInfo_stub = NULL;
+
+
+class VM_Version_StubGenerator: public StubCodeGenerator {
+ public:
+
+ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+ address generate_getPsrInfo() {
+ // Flags to test CPU type.
+ const uint32_t EFL_AC = 0x40000;
+ const uint32_t EFL_ID = 0x200000;
+ // Values for when we don't have a CPUID instruction.
+ const int CPU_FAMILY_SHIFT = 8;
+ const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
+ const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
+
+ Label detect_486, cpu486, detect_586, std_cpuid1;
+ Label ext_cpuid1, ext_cpuid5, done;
+
+ StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
+# define __ _masm->
+
+ address start = __ pc();
+
+ //
+ // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
+ //
+ // LP64: rcx and rdx are first and second argument registers on windows
+
+ __ push(rbp);
+#ifdef _LP64
+ __ mov(rbp, c_rarg0); // cpuid_info address
+#else
+ __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
+#endif
+ __ push(rbx);
+ __ push(rsi);
+ __ pushf(); // preserve rbx, and flags
+ __ pop(rax);
+ __ push(rax);
+ __ mov(rcx, rax);
+ //
+ // if we are unable to change the AC flag, we have a 386
+ //
+ __ xorl(rax, EFL_AC);
+ __ push(rax);
+ __ popf();
+ __ pushf();
+ __ pop(rax);
+ __ cmpptr(rax, rcx);
+ __ jccb(Assembler::notEqual, detect_486);
+
+ __ movl(rax, CPU_FAMILY_386);
+ __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
+ __ jmp(done);
+
+ //
+ // If we are unable to change the ID flag, we have a 486 which does
+ // not support the "cpuid" instruction.
+ //
+ __ bind(detect_486);
+ __ mov(rax, rcx);
+ __ xorl(rax, EFL_ID);
+ __ push(rax);
+ __ popf();
+ __ pushf();
+ __ pop(rax);
+ __ cmpptr(rcx, rax);
+ __ jccb(Assembler::notEqual, detect_586);
+
+ __ bind(cpu486);
+ __ movl(rax, CPU_FAMILY_486);
+ __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
+ __ jmp(done);
+
+ //
+ // At this point, we have a chip which supports the "cpuid" instruction
+ //
+ __ bind(detect_586);
+ __ xorl(rax, rax);
+ __ cpuid();
+ __ orl(rax, rax);
+ __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
+ // value of at least 1, we give up and
+ // assume a 486
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+ __ movl(Address(rsi, 8), rcx);
+ __ movl(Address(rsi,12), rdx);
+
+ __ cmpl(rax, 3); // Is cpuid(0x4) supported?
+ __ jccb(Assembler::belowEqual, std_cpuid1);
+
+ //
+ // cpuid(0x4) Deterministic cache params
+ //
+ __ movl(rax, 4);
+ __ xorl(rcx, rcx); // L1 cache
+ __ cpuid();
+ __ push(rax);
+ __ andl(rax, 0x1f); // Determine if valid cache parameters used
+ __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
+ __ pop(rax);
+ __ jccb(Assembler::equal, std_cpuid1);
+
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+ __ movl(Address(rsi, 8), rcx);
+ __ movl(Address(rsi,12), rdx);
+
+ //
+ // Standard cpuid(0x1)
+ //
+ __ bind(std_cpuid1);
+ __ movl(rax, 1);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+ __ movl(Address(rsi, 8), rcx);
+ __ movl(Address(rsi,12), rdx);
+
+ __ movl(rax, 0x80000000);
+ __ cpuid();
+ __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
+ __ jcc(Assembler::belowEqual, done);
+ __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
+ __ jccb(Assembler::belowEqual, ext_cpuid1);
+ __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
+ __ jccb(Assembler::belowEqual, ext_cpuid5);
+ //
+ // Extended cpuid(0x80000008)
+ //
+ __ movl(rax, 0x80000008);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+ __ movl(Address(rsi, 8), rcx);
+ __ movl(Address(rsi,12), rdx);
+
+ //
+ // Extended cpuid(0x80000005)
+ //
+ __ bind(ext_cpuid5);
+ __ movl(rax, 0x80000005);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+ __ movl(Address(rsi, 8), rcx);
+ __ movl(Address(rsi,12), rdx);
+
+ //
+ // Extended cpuid(0x80000001)
+ //
+ __ bind(ext_cpuid1);
+ __ movl(rax, 0x80000001);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+ __ movl(Address(rsi, 8), rcx);
+ __ movl(Address(rsi,12), rdx);
+
+ //
+ // return
+ //
+ __ bind(done);
+ __ popf();
+ __ pop(rsi);
+ __ pop(rbx);
+ __ pop(rbp);
+ __ ret(0);
+
+# undef __
+
+ return start;
+ };
+};
+
+
+void VM_Version::get_processor_features() {
+
+ _cpu = 4; // 486 by default
+ _model = 0;
+ _stepping = 0;
+ _cpuFeatures = 0;
+ _logical_processors_per_package = 1;
+
+ if (!Use486InstrsOnly) {
+ // Get raw processor info
+ getPsrInfo_stub(&_cpuid_info);
+ assert_is_initialized();
+ _cpu = extended_cpu_family();
+ _model = extended_cpu_model();
+ _stepping = cpu_stepping();
+
+ if (cpu_family() > 4) { // it supports CPUID
+ _cpuFeatures = feature_flags();
+ // Logical processors are only available on P4s and above,
+ // and only if hyperthreading is available.
+ _logical_processors_per_package = logical_processor_count();
+ }
+ }
+
+ _supports_cx8 = supports_cmpxchg8();
+
+#ifdef _LP64
+ // OS should support SSE for x64 and hardware should support at least SSE2.
+ if (!VM_Version::supports_sse2()) {
+ vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
+ }
+#endif
+
+ // If the OS doesn't support SSE, we can't use this feature even if the HW does
+ if (!os::supports_sse())
+ _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
+
+ if (UseSSE < 4) {
+ _cpuFeatures &= ~CPU_SSE4_1;
+ _cpuFeatures &= ~CPU_SSE4_2;
+ }
+
+ if (UseSSE < 3) {
+ _cpuFeatures &= ~CPU_SSE3;
+ _cpuFeatures &= ~CPU_SSSE3;
+ _cpuFeatures &= ~CPU_SSE4A;
+ }
+
+ if (UseSSE < 2)
+ _cpuFeatures &= ~CPU_SSE2;
+
+ if (UseSSE < 1)
+ _cpuFeatures &= ~CPU_SSE;
+
+ if (logical_processors_per_package() == 1) {
+ // HT processor could be installed on a system which doesn't support HT.
+ _cpuFeatures &= ~CPU_HT;
+ }
+
+ char buf[256];
+ jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ cores_per_cpu(), threads_per_core(),
+ cpu_family(), _model, _stepping,
+ (supports_cmov() ? ", cmov" : ""),
+ (supports_cmpxchg8() ? ", cx8" : ""),
+ (supports_fxsr() ? ", fxsr" : ""),
+ (supports_mmx() ? ", mmx" : ""),
+ (supports_sse() ? ", sse" : ""),
+ (supports_sse2() ? ", sse2" : ""),
+ (supports_sse3() ? ", sse3" : ""),
+ (supports_ssse3()? ", ssse3": ""),
+ (supports_sse4_1() ? ", sse4.1" : ""),
+ (supports_sse4_2() ? ", sse4.2" : ""),
+ (supports_mmx_ext() ? ", mmxext" : ""),
+ (supports_3dnow() ? ", 3dnow" : ""),
+ (supports_3dnow2() ? ", 3dnowext" : ""),
+ (supports_sse4a() ? ", sse4a": ""),
+ (supports_ht() ? ", ht": ""));
+ _features_str = strdup(buf);
+
+ // UseSSE is set to the smaller of what hardware supports and what
+ // the command line requires. I.e., you cannot set UseSSE to 2 on
+ // older Pentiums which do not support it.
+ if( UseSSE > 4 ) UseSSE=4;
+ if( UseSSE < 0 ) UseSSE=0;
+ if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
+ UseSSE = MIN2((intx)3,UseSSE);
+ if( !supports_sse3() ) // Drop to 2 if no SSE3 support
+ UseSSE = MIN2((intx)2,UseSSE);
+ if( !supports_sse2() ) // Drop to 1 if no SSE2 support
+ UseSSE = MIN2((intx)1,UseSSE);
+ if( !supports_sse () ) // Drop to 0 if no SSE support
+ UseSSE = 0;
+
+ // On new cpus instructions which update whole XMM register should be used
+ // to prevent partial register stall due to dependencies on high half.
+ //
+ // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
+ // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
+ // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
+ // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
+
+ if( is_amd() ) { // AMD cpus specific settings
+ if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
+ // Use it on new AMD cpus starting from Opteron.
+ UseAddressNop = true;
+ }
+ if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
+ // Use it on new AMD cpus starting from Opteron.
+ UseNewLongLShift = true;
+ }
+ if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
+ if( supports_sse4a() ) {
+ UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
+ } else {
+ UseXmmLoadAndClearUpper = false;
+ }
+ }
+ if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
+ if( supports_sse4a() ) {
+ UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
+ } else {
+ UseXmmRegToRegMoveAll = false;
+ }
+ }
+ if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
+ if( supports_sse4a() ) {
+ UseXmmI2F = true;
+ } else {
+ UseXmmI2F = false;
+ }
+ }
+ if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
+ if( supports_sse4a() ) {
+ UseXmmI2D = true;
+ } else {
+ UseXmmI2D = false;
+ }
+ }
+ }
+
+ if( is_intel() ) { // Intel cpus specific settings
+ if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
+ UseStoreImmI16 = false; // don't use it on Intel cpus
+ }
+ if( cpu_family() == 6 || cpu_family() == 15 ) {
+ if( FLAG_IS_DEFAULT(UseAddressNop) ) {
+ // Use it on all Intel cpus starting from PentiumPro
+ UseAddressNop = true;
+ }
+ }
+ if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
+ UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
+ }
+ if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
+ if( supports_sse3() ) {
+ UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
+ } else {
+ UseXmmRegToRegMoveAll = false;
+ }
+ }
+ if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
+#ifdef COMPILER2
+ if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
+ // For new Intel cpus do the next optimization:
+ // don't align the beginning of a loop if there are enough instructions
+ // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
+ // in current fetch line (OptoLoopAlignment) or the padding
+ // is big (> MaxLoopPad).
+ // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
+ // generated NOP instructions. 11 is the largest size of one
+ // address NOP instruction '0F 1F' (see Assembler::nop(i)).
+ MaxLoopPad = 11;
+ }
+#endif // COMPILER2
+ if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+ UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
+ }
+ if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
+ if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
+ UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+ }
+ }
+ }
+ }
+
+ assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
+ assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
+
+ // set valid Prefetch instruction
+ if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
+ if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
+ if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
+ if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3;
+
+ if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
+ if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
+ if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
+ if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3;
+
+ // Allocation prefetch settings
+ intx cache_line_size = L1_data_cache_line_size();
+ if( cache_line_size > AllocatePrefetchStepSize )
+ AllocatePrefetchStepSize = cache_line_size;
+ if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
+ AllocatePrefetchLines = 3; // Optimistic value
+ assert(AllocatePrefetchLines > 0, "invalid value");
+ if( AllocatePrefetchLines < 1 ) // set valid value in product VM
+ AllocatePrefetchLines = 1; // Conservative value
+
+ AllocatePrefetchDistance = allocate_prefetch_distance();
+ AllocatePrefetchStyle = allocate_prefetch_style();
+
+ if( AllocatePrefetchStyle == 2 && is_intel() &&
+ cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
+#ifdef _LP64
+ AllocatePrefetchDistance = 384;
+#else
+ AllocatePrefetchDistance = 320;
+#endif
+ }
+ assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
+
+#ifdef _LP64
+ // Prefetch settings
+ PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
+ PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
+ PrefetchFieldsAhead = prefetch_fields_ahead();
+#endif
+
+#ifndef PRODUCT
+ if (PrintMiscellaneous && Verbose) {
+ tty->print_cr("Logical CPUs per core: %u",
+ logical_processors_per_package());
+ tty->print_cr("UseSSE=%d",UseSSE);
+ tty->print("Allocation: ");
+ if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) {
+ tty->print_cr("no prefetching");
+ } else {
+ if (UseSSE == 0 && supports_3dnow()) {
+ tty->print("PREFETCHW");
+ } else if (UseSSE >= 1) {
+ if (AllocatePrefetchInstr == 0) {
+ tty->print("PREFETCHNTA");
+ } else if (AllocatePrefetchInstr == 1) {
+ tty->print("PREFETCHT0");
+ } else if (AllocatePrefetchInstr == 2) {
+ tty->print("PREFETCHT2");
+ } else if (AllocatePrefetchInstr == 3) {
+ tty->print("PREFETCHW");
+ }
+ }
+ if (AllocatePrefetchLines > 1) {
+ tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
+ } else {
+ tty->print_cr(" %d, one line", AllocatePrefetchDistance);
+ }
+ }
+
+ if (PrefetchCopyIntervalInBytes > 0) {
+ tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
+ }
+ if (PrefetchScanIntervalInBytes > 0) {
+ tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
+ }
+ if (PrefetchFieldsAhead > 0) {
+ tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
+ }
+ }
+#endif // !PRODUCT
+}
+
+void VM_Version::initialize() {
+ ResourceMark rm;
+ // Making this stub must be FIRST use of assembler
+
+ stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
+ if (stub_blob == NULL) {
+ vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
+ }
+ CodeBuffer c(stub_blob->instructions_begin(),
+ stub_blob->instructions_size());
+ VM_Version_StubGenerator g(&c);
+ getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
+ g.generate_getPsrInfo());
+
+ get_processor_features();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Tue Feb 24 09:53:20 2009 -0800
@@ -0,0 +1,459 @@
+/*
+ * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class VM_Version : public Abstract_VM_Version {
+public:
+ // cpuid result register layouts. These are all unions of a uint32_t
+ // (in case anyone wants access to the register as a whole) and a bitfield.
+
+ union StdCpuid1Eax {
+ uint32_t value;
+ struct {
+ uint32_t stepping : 4,
+ model : 4,
+ family : 4,
+ proc_type : 2,
+ : 2,
+ ext_model : 4,
+ ext_family : 8,
+ : 4;
+ } bits;
+ };
+
+ union StdCpuid1Ebx { // example, unused
+ uint32_t value;
+ struct {
+ uint32_t brand_id : 8,
+ clflush_size : 8,
+ threads_per_cpu : 8,
+ apic_id : 8;
+ } bits;
+ };
+
+ union StdCpuid1Ecx {
+ uint32_t value;
+ struct {
+ uint32_t sse3 : 1,
+ : 2,
+ monitor : 1,
+ : 1,
+ vmx : 1,
+ : 1,
+ est : 1,
+ : 1,
+ ssse3 : 1,
+ cid : 1,
+ : 2,
+ cmpxchg16: 1,
+ : 4,
+ dca : 1,
+ sse4_1 : 1,
+ sse4_2 : 1,
+ : 11;
+ } bits;
+ };
+
+ union StdCpuid1Edx {
+ uint32_t value;
+ struct {
+ uint32_t : 4,
+ tsc : 1,
+ : 3,
+ cmpxchg8 : 1,
+ : 6,
+ cmov : 1,
+ : 7,
+ mmx : 1,
+ fxsr : 1,
+ sse : 1,
+ sse2 : 1,
+ : 1,
+ ht : 1,
+ : 3;
+ } bits;
+ };
+
+ union DcpCpuid4Eax {
+ uint32_t value;
+ struct {
+ uint32_t cache_type : 5,
+ : 21,
+ cores_per_cpu : 6;
+ } bits;
+ };
+
+ union DcpCpuid4Ebx {
+ uint32_t value;
+ struct {
+ uint32_t L1_line_size : 12,
+ partitions : 10,
+ associativity : 10;
+ } bits;
+ };
+
+ union ExtCpuid1Ecx {
+ uint32_t value;
+ struct {
+ uint32_t LahfSahf : 1,
+ CmpLegacy : 1,
+ : 4,
+ abm : 1,
+ sse4a : 1,
+ misalignsse : 1,
+ prefetchw : 1,
+ : 22;
+ } bits;
+ };
+
+ union ExtCpuid1Edx {
+ uint32_t value;
+ struct {
+ uint32_t : 22,
+ mmx_amd : 1,
+ mmx : 1,
+ fxsr : 1,
+ : 4,
+ long_mode : 1,
+ tdnow2 : 1,
+ tdnow : 1;
+ } bits;
+ };
+
+ union ExtCpuid5Ex {
+ uint32_t value;
+ struct {
+ uint32_t L1_line_size : 8,
+ L1_tag_lines : 8,
+ L1_assoc : 8,
+ L1_size : 8;
+ } bits;
+ };
+
+ union ExtCpuid8Ecx {
+ uint32_t value;
+ struct {
+ uint32_t cores_per_cpu : 8,
+ : 24;
+ } bits;
+ };
+
+protected:
+ static int _cpu;
+ static int _model;
+ static int _stepping;
+ static int _cpuFeatures; // features returned by the "cpuid" instruction
+ // 0 if this instruction is not available
+ static const char* _features_str;
+
+ enum {
+ CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX)
+ CPU_CMOV = (1 << 1),
+ CPU_FXSR = (1 << 2),
+ CPU_HT = (1 << 3),
+ CPU_MMX = (1 << 4),
+ CPU_3DNOW = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
+ CPU_SSE = (1 << 6),
+ CPU_SSE2 = (1 << 7),
+ CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
+ CPU_SSSE3 = (1 << 9),
+ CPU_SSE4A = (1 << 10),
+ CPU_SSE4_1 = (1 << 11),
+ CPU_SSE4_2 = (1 << 12)
+ } cpuFeatureFlags;
+
+ // cpuid information block. All info derived from executing cpuid with
+ // various function numbers is stored here. Intel and AMD info is
+ // merged in this block: accessor methods disentangle it.
+ //
+ // The info block is laid out in subblocks of 4 dwords corresponding to
+ // eax, ebx, ecx and edx, whether or not they contain anything useful.
+ struct CpuidInfo {
+ // cpuid function 0
+ uint32_t std_max_function;
+ uint32_t std_vendor_name_0;
+ uint32_t std_vendor_name_1;
+ uint32_t std_vendor_name_2;
+
+ // cpuid function 1
+ StdCpuid1Eax std_cpuid1_eax;
+ StdCpuid1Ebx std_cpuid1_ebx;
+ StdCpuid1Ecx std_cpuid1_ecx;
+ StdCpuid1Edx std_cpuid1_edx;
+
+ // cpuid function 4 (deterministic cache parameters)
+ DcpCpuid4Eax dcp_cpuid4_eax;
+ DcpCpuid4Ebx dcp_cpuid4_ebx;
+ uint32_t dcp_cpuid4_ecx; // unused currently
+ uint32_t dcp_cpuid4_edx; // unused currently
+
+ // cpuid function 0x80000000 // example, unused
+ uint32_t ext_max_function;
+ uint32_t ext_vendor_name_0;
+ uint32_t ext_vendor_name_1;
+ uint32_t ext_vendor_name_2;
+
+ // cpuid function 0x80000001
+ uint32_t ext_cpuid1_eax; // reserved
+ uint32_t ext_cpuid1_ebx; // reserved
+ ExtCpuid1Ecx ext_cpuid1_ecx;
+ ExtCpuid1Edx ext_cpuid1_edx;
+
+ // cpuid functions 0x80000002 thru 0x80000004: example, unused
+ uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
+ uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
+ uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
+
+ // cpuid function 0x80000005 //AMD L1, Intel reserved
+ uint32_t ext_cpuid5_eax; // unused currently
+ uint32_t ext_cpuid5_ebx; // reserved
+ ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD)
+ ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD)
+
+ // cpuid function 0x80000008
+ uint32_t ext_cpuid8_eax; // unused currently
+ uint32_t ext_cpuid8_ebx; // reserved
+ ExtCpuid8Ecx ext_cpuid8_ecx;
+ uint32_t ext_cpuid8_edx; // reserved
+ };
+
+ // The actual cpuid info block
+ static CpuidInfo _cpuid_info;
+
+ // Extractors and predicates
+ static uint32_t extended_cpu_family() {
+ uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
+ result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
+ return result;
+ }
+ static uint32_t extended_cpu_model() {
+ uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
+ result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
+ return result;
+ }
+ static uint32_t cpu_stepping() {
+ uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
+ return result;
+ }
+ static uint logical_processor_count() {
+ uint result = threads_per_core();
+ return result;
+ }
+ static uint32_t feature_flags() {
+ uint32_t result = 0;
+ if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
+ result |= CPU_CX8;
+ if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
+ result |= CPU_CMOV;
+ if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() &&
+ _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)
+ result |= CPU_FXSR;
+ // HT flag is set for multi-core processors also.
+ if (threads_per_core() > 1)
+ result |= CPU_HT;
+ if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
+ _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
+ result |= CPU_MMX;
+ if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
+ result |= CPU_3DNOW;
+ if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
+ result |= CPU_SSE;
+ if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
+ result |= CPU_SSE2;
+ if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
+ result |= CPU_SSE3;
+ if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
+ result |= CPU_SSSE3;
+ if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
+ result |= CPU_SSE4A;
+ if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
+ result |= CPU_SSE4_1;
+ if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
+ result |= CPU_SSE4_2;
+ return result;
+ }
+
+ static void get_processor_features();
+
+public:
+ // Offsets for cpuid asm stub
+ static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
+ static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
+ static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
+ static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
+ static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
+ static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+
+ // Initialization
+ static void initialize();
+
+ // Asserts
+ static void assert_is_initialized() {
+ assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
+ }
+
+ //
+ // Processor family:
+ // 3 - 386
+ // 4 - 486
+ // 5 - Pentium
+ // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
+ // Pentium M, Core Solo, Core Duo, Core2 Duo
+ // family 6 model: 9, 13, 14, 15
+ // 0x0f - Pentium 4, Opteron
+ //
+ // Note: The cpu family should be used to select between
+ // instruction sequences which are valid on all Intel
+ // processors. Use the feature test functions below to
+ // determine whether a particular instruction is supported.
+ //
+ static int cpu_family() { return _cpu;}
+ static bool is_P6() { return cpu_family() >= 6; }
+
+ static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
+ static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
+
+ static uint cores_per_cpu() {
+ uint result = 1;
+ if (is_intel()) {
+ result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
+ } else if (is_amd()) {
+ result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
+ }
+ return result;
+ }
+
+ static uint threads_per_core() {
+ uint result = 1;
+ if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
+ result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+ cores_per_cpu();
+ }
+ return result;
+ }
+
+ static intx L1_data_cache_line_size() {
+ intx result = 0;
+ if (is_intel()) {
+ result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
+ } else if (is_amd()) {
+ result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
+ }
+ if (result < 32) // not defined ?
+ result = 32; // 32 bytes by default on x86 and other x64
+ return result;
+ }
+
+ //
+ // Feature identification
+ //
+ static bool supports_cpuid() { return _cpuFeatures != 0; }
+ static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
+ static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; }
+ static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; }
+ static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; }
+ static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; }
+ static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; }
+ static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
+ static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
+ static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
+ static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
+ static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
+ //
+ // AMD features
+ //
+ static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; }
+ static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
+ static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
+ static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; }
+
+ static bool supports_compare_and_exchange() { return true; }
+
+ static const char* cpu_features() { return _features_str; }
+
+ static intx allocate_prefetch_distance() {
+ // This method should be called before allocate_prefetch_style().
+ //
+ // Hardware prefetching (distance/size in bytes):
+ // Pentium 3 - 64 / 32
+ // Pentium 4 - 256 / 128
+ // Athlon - 64 / 32 ????
+ // Opteron - 128 / 64 only when 2 sequential cache lines accessed
+ // Core - 128 / 64
+ //
+ // Software prefetching (distance in bytes / instruction with best score):
+ // Pentium 3 - 128 / prefetchnta
+ // Pentium 4 - 512 / prefetchnta
+ // Athlon - 128 / prefetchnta
+ // Opteron - 256 / prefetchnta
+ // Core - 256 / prefetchnta
+ // It will be used only when AllocatePrefetchStyle > 0
+
+ intx count = AllocatePrefetchDistance;
+ if (count < 0) { // default ?
+ if (is_amd()) { // AMD
+ if (supports_sse2())
+ count = 256; // Opteron
+ else
+ count = 128; // Athlon
+ } else { // Intel
+ if (supports_sse2())
+ if (cpu_family() == 6) {
+ count = 256; // Pentium M, Core, Core2
+ } else {
+ count = 512; // Pentium 4
+ }
+ else
+ count = 128; // Pentium 3 (and all other old CPUs)
+ }
+ }
+ return count;
+ }
+ static intx allocate_prefetch_style() {
+ assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
+ // Return 0 if AllocatePrefetchDistance was not defined.
+ return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
+ }
+
+ // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
+ // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
+ // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
+ // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
+
+ // gc copy/scan is disabled if prefetchw isn't supported, because
+ // Prefetch::write emits an inlined prefetchw on Linux.
+ // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
+ // The used prefetcht0 instruction works for both amd64 and em64t.
+ static intx prefetch_copy_interval_in_bytes() {
+ intx interval = PrefetchCopyIntervalInBytes;
+ return interval >= 0 ? interval : 576;
+ }
+ static intx prefetch_scan_interval_in_bytes() {
+ intx interval = PrefetchScanIntervalInBytes;
+ return interval >= 0 ? interval : 576;
+ }
+ static intx prefetch_fields_ahead() {
+ intx count = PrefetchFieldsAhead;
+ return count >= 0 ? count : 1;
+ }
+};
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp Fri Feb 20 11:12:26 2009 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,472 +0,0 @@
-/*
- * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-# include "incls/_precompiled.incl"
-# include "incls/_vm_version_x86_32.cpp.incl"
-
-
-int VM_Version::_cpu;
-int VM_Version::_model;
-int VM_Version::_stepping;
-int VM_Version::_cpuFeatures;
-const char* VM_Version::_features_str = "";
-VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
-
-static BufferBlob* stub_blob;
-static const int stub_size = 300;
-
-extern "C" {
- typedef void (*getPsrInfo_stub_t)(void*);
-}
-static getPsrInfo_stub_t getPsrInfo_stub = NULL;
-
-
-class VM_Version_StubGenerator: public StubCodeGenerator {
- public:
-
- VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
-
- address generate_getPsrInfo() {
- // Flags to test CPU type.
- const uint32_t EFL_AC = 0x40000;
- const uint32_t EFL_ID = 0x200000;
- // Values for when we don't have a CPUID instruction.
- const int CPU_FAMILY_SHIFT = 8;
- const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
- const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
-
- Label detect_486, cpu486, detect_586, std_cpuid1;
- Label ext_cpuid1, ext_cpuid5, done;
-
- StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
-# define __ _masm->
-
- address start = __ pc();
-
- //
- // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
- //
- __ push(rbp);
- __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
- __ push(rbx);
- __ push(rsi);
- __ pushf(); // preserve rbx, and flags
- __ pop(rax);
- __ push(rax);
- __ mov(rcx, rax);
- //
- // if we are unable to change the AC flag, we have a 386
- //
- __ xorl(rax, EFL_AC);
- __ push(rax);
- __ popf();
- __ pushf();
- __ pop(rax);
- __ cmpptr(rax, rcx);
- __ jccb(Assembler::notEqual, detect_486);
-
- __ movl(rax, CPU_FAMILY_386);
- __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
- __ jmp(done);
-
- //
- // If we are unable to change the ID flag, we have a 486 which does
- // not support the "cpuid" instruction.
- //
- __ bind(detect_486);
- __ mov(rax, rcx);
- __ xorl(rax, EFL_ID);
- __ push(rax);
- __ popf();
- __ pushf();
- __ pop(rax);
- __ cmpptr(rcx, rax);
- __ jccb(Assembler::notEqual, detect_586);
-
- __ bind(cpu486);
- __ movl(rax, CPU_FAMILY_486);
- __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
- __ jmp(done);
-
- //
- // at this point, we have a chip which supports the "cpuid" instruction
- //
- __ bind(detect_586);
- __ xorptr(rax, rax);
- __ cpuid();
- __ orptr(rax, rax);
- __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
- // value of at least 1, we give up and
- // assume a 486
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- __ cmpl(rax, 3); // Is cpuid(0x4) supported?
- __ jccb(Assembler::belowEqual, std_cpuid1);
-
- //
- // cpuid(0x4) Deterministic cache params
- //
- __ movl(rax, 4); // and rcx already set to 0x0
- __ xorl(rcx, rcx);
- __ cpuid();
- __ push(rax);
- __ andl(rax, 0x1f); // Determine if valid cache parameters used
- __ orl(rax, rax); // rax,[4:0] == 0 indicates invalid cache
- __ pop(rax);
- __ jccb(Assembler::equal, std_cpuid1);
-
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- //
- // Standard cpuid(0x1)
- //
- __ bind(std_cpuid1);
- __ movl(rax, 1);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- __ movl(rax, 0x80000000);
- __ cpuid();
- __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
- __ jcc(Assembler::belowEqual, done);
- __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
- __ jccb(Assembler::belowEqual, ext_cpuid1);
- __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
- __ jccb(Assembler::belowEqual, ext_cpuid5);
- //
- // Extended cpuid(0x80000008)
- //
- __ movl(rax, 0x80000008);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- //
- // Extended cpuid(0x80000005)
- //
- __ bind(ext_cpuid5);
- __ movl(rax, 0x80000005);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- //
- // Extended cpuid(0x80000001)
- //
- __ bind(ext_cpuid1);
- __ movl(rax, 0x80000001);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- //
- // return
- //
- __ bind(done);
- __ popf();
- __ pop(rsi);
- __ pop(rbx);
- __ pop(rbp);
- __ ret(0);
-
-# undef __
-
- return start;
- };
-};
-
-
-void VM_Version::get_processor_features() {
-
- _cpu = 4; // 486 by default
- _model = 0;
- _stepping = 0;
- _cpuFeatures = 0;
- _logical_processors_per_package = 1;
- if (!Use486InstrsOnly) {
- // Get raw processor info
- getPsrInfo_stub(&_cpuid_info);
- assert_is_initialized();
- _cpu = extended_cpu_family();
- _model = extended_cpu_model();
- _stepping = cpu_stepping();
- if (cpu_family() > 4) { // it supports CPUID
- _cpuFeatures = feature_flags();
- // Logical processors are only available on P4s and above,
- // and only if hyperthreading is available.
- _logical_processors_per_package = logical_processor_count();
- }
- }
- _supports_cx8 = supports_cmpxchg8();
- // if the OS doesn't support SSE, we can't use this feature even if the HW does
- if( !os::supports_sse())
- _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
- if (UseSSE < 4) {
- _cpuFeatures &= ~CPU_SSE4_1;
- _cpuFeatures &= ~CPU_SSE4_2;
- }
- if (UseSSE < 3) {
- _cpuFeatures &= ~CPU_SSE3;
- _cpuFeatures &= ~CPU_SSSE3;
- _cpuFeatures &= ~CPU_SSE4A;
- }
- if (UseSSE < 2)
- _cpuFeatures &= ~CPU_SSE2;
- if (UseSSE < 1)
- _cpuFeatures &= ~CPU_SSE;
-
- if (logical_processors_per_package() == 1) {
- // HT processor could be installed on a system which doesn't support HT.
- _cpuFeatures &= ~CPU_HT;
- }
-
- char buf[256];
- jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
- cores_per_cpu(), threads_per_core(),
- cpu_family(), _model, _stepping,
- (supports_cmov() ? ", cmov" : ""),
- (supports_cmpxchg8() ? ", cx8" : ""),
- (supports_fxsr() ? ", fxsr" : ""),
- (supports_mmx() ? ", mmx" : ""),
- (supports_sse() ? ", sse" : ""),
- (supports_sse2() ? ", sse2" : ""),
- (supports_sse3() ? ", sse3" : ""),
- (supports_ssse3()? ", ssse3": ""),
- (supports_sse4_1() ? ", sse4.1" : ""),
- (supports_sse4_2() ? ", sse4.2" : ""),
- (supports_mmx_ext() ? ", mmxext" : ""),
- (supports_3dnow() ? ", 3dnow" : ""),
- (supports_3dnow2() ? ", 3dnowext" : ""),
- (supports_sse4a() ? ", sse4a": ""),
- (supports_ht() ? ", ht": ""));
- _features_str = strdup(buf);
-
- // UseSSE is set to the smaller of what hardware supports and what
- // the command line requires. I.e., you cannot set UseSSE to 2 on
- // older Pentiums which do not support it.
- if( UseSSE > 4 ) UseSSE=4;
- if( UseSSE < 0 ) UseSSE=0;
- if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
- UseSSE = MIN2((intx)3,UseSSE);
- if( !supports_sse3() ) // Drop to 2 if no SSE3 support
- UseSSE = MIN2((intx)2,UseSSE);
- if( !supports_sse2() ) // Drop to 1 if no SSE2 support
- UseSSE = MIN2((intx)1,UseSSE);
- if( !supports_sse () ) // Drop to 0 if no SSE support
- UseSSE = 0;
-
- // On new cpus instructions which update whole XMM register should be used
- // to prevent partial register stall due to dependencies on high half.
- //
- // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
- // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
- // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
- // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
-
- if( is_amd() ) { // AMD cpus specific settings
- if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
- // Use it on new AMD cpus starting from Opteron.
- UseAddressNop = true;
- }
- if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
- // Use it on new AMD cpus starting from Opteron.
- UseNewLongLShift = true;
- }
- if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
- if( supports_sse4a() ) {
- UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
- } else {
- UseXmmLoadAndClearUpper = false;
- }
- }
- if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
- if( supports_sse4a() ) {
- UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
- } else {
- UseXmmRegToRegMoveAll = false;
- }
- }
- if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
- if( supports_sse4a() ) {
- UseXmmI2F = true;
- } else {
- UseXmmI2F = false;
- }
- }
- if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
- if( supports_sse4a() ) {
- UseXmmI2D = true;
- } else {
- UseXmmI2D = false;
- }
- }
- }
-
- if( is_intel() ) { // Intel cpus specific settings
- if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
- UseStoreImmI16 = false; // don't use it on Intel cpus
- }
- if( cpu_family() == 6 || cpu_family() == 15 ) {
- if( FLAG_IS_DEFAULT(UseAddressNop) ) {
- // Use it on all Intel cpus starting from PentiumPro
- UseAddressNop = true;
- }
- }
- if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
- UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
- }
- if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
- if( supports_sse3() ) {
- UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
- } else {
- UseXmmRegToRegMoveAll = false;
- }
- }
- if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
-#ifdef COMPILER2
- if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
- // For new Intel cpus do the next optimization:
- // don't align the beginning of a loop if there are enough instructions
- // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
- // in current fetch line (OptoLoopAlignment) or the padding
- // is big (> MaxLoopPad).
- // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
- // generated NOP instructions. 11 is the largest size of one
- // address NOP instruction '0F 1F' (see Assembler::nop(i)).
- MaxLoopPad = 11;
- }
-#endif // COMPILER2
- if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
- UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
- }
- if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
- if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
- UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
- }
- }
- }
- }
-
- assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
- assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
-
- // set valid Prefetch instruction
- if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
- if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
- if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
- if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3;
-
- if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
- if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
- if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
- if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3;
-
- // Allocation prefetch settings
- intx cache_line_size = L1_data_cache_line_size();
- if( cache_line_size > AllocatePrefetchStepSize )
- AllocatePrefetchStepSize = cache_line_size;
- if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
- AllocatePrefetchLines = 3; // Optimistic value
- assert(AllocatePrefetchLines > 0, "invalid value");
- if( AllocatePrefetchLines < 1 ) // set valid value in product VM
- AllocatePrefetchLines = 1; // Conservative value
-
- AllocatePrefetchDistance = allocate_prefetch_distance();
- AllocatePrefetchStyle = allocate_prefetch_style();
-
- if( AllocatePrefetchStyle == 2 && is_intel() &&
- cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
- AllocatePrefetchDistance = 320;
- }
- assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
-
-#ifndef PRODUCT
- if (PrintMiscellaneous && Verbose) {
- tty->print_cr("Logical CPUs per core: %u",
- logical_processors_per_package());
- tty->print_cr("UseSSE=%d",UseSSE);
- tty->print("Allocation: ");
- if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) {
- tty->print_cr("no prefetching");
- } else {
- if (UseSSE == 0 && supports_3dnow()) {
- tty->print("PREFETCHW");
- } else if (UseSSE >= 1) {
- if (AllocatePrefetchInstr == 0) {
- tty->print("PREFETCHNTA");
- } else if (AllocatePrefetchInstr == 1) {
- tty->print("PREFETCHT0");
- } else if (AllocatePrefetchInstr == 2) {
- tty->print("PREFETCHT2");
- } else if (AllocatePrefetchInstr == 3) {
- tty->print("PREFETCHW");
- }
- }
- if (AllocatePrefetchLines > 1) {
- tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
- } else {
- tty->print_cr(" %d, one line", AllocatePrefetchDistance);
- }
- }
- }
-#endif // !PRODUCT
-}
-
-void VM_Version::initialize() {
- ResourceMark rm;
- // Making this stub must be FIRST use of assembler
-
- stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
- if (stub_blob == NULL) {
- vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
- }
- CodeBuffer c(stub_blob->instructions_begin(),
- stub_blob->instructions_size());
- VM_Version_StubGenerator g(&c);
- getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
- g.generate_getPsrInfo());
-
- get_processor_features();
-}
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp Fri Feb 20 11:12:26 2009 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,439 +0,0 @@
-/*
- * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-class VM_Version: public Abstract_VM_Version {
-public:
- // cpuid result register layouts. These are all unions of a uint32_t
- // (in case anyone wants access to the register as a whole) and a bitfield.
-
- union StdCpuid1Eax {
- uint32_t value;
- struct {
- uint32_t stepping : 4,
- model : 4,
- family : 4,
- proc_type : 2,
- : 2,
- ext_model : 4,
- ext_family : 8,
- : 4;
- } bits;
- };
-
- union StdCpuid1Ebx { // example, unused
- uint32_t value;
- struct {
- uint32_t brand_id : 8,
- clflush_size : 8,
- threads_per_cpu : 8,
- apic_id : 8;
- } bits;
- };
-
- union StdCpuid1Ecx {
- uint32_t value;
- struct {
- uint32_t sse3 : 1,
- : 2,
- monitor : 1,
- : 1,
- vmx : 1,
- : 1,
- est : 1,
- : 1,
- ssse3 : 1,
- cid : 1,
- : 2,
- cmpxchg16: 1,
- : 4,
- dca : 1,
- sse4_1 : 1,
- sse4_2 : 1,
- : 11;
- } bits;
- };
-
- union StdCpuid1Edx {
- uint32_t value;
- struct {
- uint32_t : 4,
- tsc : 1,
- : 3,
- cmpxchg8 : 1,
- : 6,
- cmov : 1,
- : 7,
- mmx : 1,
- fxsr : 1,
- sse : 1,
- sse2 : 1,
- : 1,
- ht : 1,
- : 3;
- } bits;
- };
-
- union DcpCpuid4Eax {
- uint32_t value;
- struct {
- uint32_t cache_type : 5,
- : 21,
- cores_per_cpu : 6;
- } bits;
- };
-
- union DcpCpuid4Ebx {
- uint32_t value;
- struct {
- uint32_t L1_line_size : 12,
- partitions : 10,
- associativity : 10;
- } bits;
- };
-
- union ExtCpuid1Ecx {
- uint32_t value;
- struct {
- uint32_t LahfSahf : 1,
- CmpLegacy : 1,
- : 4,
- abm : 1,
- sse4a : 1,
- misalignsse : 1,
- prefetchw : 1,
- : 22;
- } bits;
- };
-
- union ExtCpuid1Edx {
- uint32_t value;
- struct {
- uint32_t : 22,
- mmx_amd : 1,
- mmx : 1,
- fxsr : 1,
- : 4,
- long_mode : 1,
- tdnow2 : 1,
- tdnow : 1;
- } bits;
- };
-
- union ExtCpuid5Ex {
- uint32_t value;
- struct {
- uint32_t L1_line_size : 8,
- L1_tag_lines : 8,
- L1_assoc : 8,
- L1_size : 8;
- } bits;
- };
-
- union ExtCpuid8Ecx {
- uint32_t value;
- struct {
- uint32_t cores_per_cpu : 8,
- : 24;
- } bits;
- };
-
-protected:
- static int _cpu;
- static int _model;
- static int _stepping;
- static int _cpuFeatures; // features returned by the "cpuid" instruction
- // 0 if this instruction is not available
- static const char* _features_str;
-
- enum {
- CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX)
- CPU_CMOV = (1 << 1),
- CPU_FXSR = (1 << 2),
- CPU_HT = (1 << 3),
- CPU_MMX = (1 << 4),
- CPU_3DNOW= (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
- CPU_SSE = (1 << 6),
- CPU_SSE2 = (1 << 7),
- CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX)
- CPU_SSSE3= (1 << 9),
- CPU_SSE4A= (1 <<10),
- CPU_SSE4_1 = (1 << 11),
- CPU_SSE4_2 = (1 << 12)
- } cpuFeatureFlags;
-
- // cpuid information block. All info derived from executing cpuid with
- // various function numbers is stored here. Intel and AMD info is
- // merged in this block: accessor methods disentangle it.
- //
- // The info block is laid out in subblocks of 4 dwords corresponding to
- // rax, rbx, rcx and rdx, whether or not they contain anything useful.
- struct CpuidInfo {
- // cpuid function 0
- uint32_t std_max_function;
- uint32_t std_vendor_name_0;
- uint32_t std_vendor_name_1;
- uint32_t std_vendor_name_2;
-
- // cpuid function 1
- StdCpuid1Eax std_cpuid1_rax;
- StdCpuid1Ebx std_cpuid1_rbx;
- StdCpuid1Ecx std_cpuid1_rcx;
- StdCpuid1Edx std_cpuid1_rdx;
-
- // cpuid function 4 (deterministic cache parameters)
- DcpCpuid4Eax dcp_cpuid4_rax;
- DcpCpuid4Ebx dcp_cpuid4_rbx;
- uint32_t dcp_cpuid4_rcx; // unused currently
- uint32_t dcp_cpuid4_rdx; // unused currently
-
- // cpuid function 0x80000000 // example, unused
- uint32_t ext_max_function;
- uint32_t ext_vendor_name_0;
- uint32_t ext_vendor_name_1;
- uint32_t ext_vendor_name_2;
-
- // cpuid function 0x80000001
- uint32_t ext_cpuid1_rax; // reserved
- uint32_t ext_cpuid1_rbx; // reserved
- ExtCpuid1Ecx ext_cpuid1_rcx;
- ExtCpuid1Edx ext_cpuid1_rdx;
-
- // cpuid functions 0x80000002 thru 0x80000004: example, unused
- uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
- uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
- uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
-
- // cpuid function 0x80000005 //AMD L1, Intel reserved
- uint32_t ext_cpuid5_rax; // unused currently
- uint32_t ext_cpuid5_rbx; // reserved
- ExtCpuid5Ex ext_cpuid5_rcx; // L1 data cache info (AMD)
- ExtCpuid5Ex ext_cpuid5_rdx; // L1 instruction cache info (AMD)
-
- // cpuid function 0x80000008
- uint32_t ext_cpuid8_rax; // unused currently
- uint32_t ext_cpuid8_rbx; // reserved
- ExtCpuid8Ecx ext_cpuid8_rcx;
- uint32_t ext_cpuid8_rdx; // reserved
- };
-
- // The actual cpuid info block
- static CpuidInfo _cpuid_info;
-
- // Extractors and predicates
- static uint32_t extended_cpu_family() {
- uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family;
- result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
- return result;
- }
- static uint32_t extended_cpu_model() {
- uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model;
- result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
- return result;
- }
- static uint32_t cpu_stepping() {
- uint32_t result = _cpuid_info.std_cpuid1_rax.bits.stepping;
- return result;
- }
- static uint logical_processor_count() {
- uint result = threads_per_core();
- return result;
- }
- static uint32_t feature_flags() {
- uint32_t result = 0;
- if (_cpuid_info.std_cpuid1_rdx.bits.cmpxchg8 != 0)
- result |= CPU_CX8;
- if (_cpuid_info.std_cpuid1_rdx.bits.cmov != 0)
- result |= CPU_CMOV;
- if (_cpuid_info.std_cpuid1_rdx.bits.fxsr != 0 || is_amd() &&
- _cpuid_info.ext_cpuid1_rdx.bits.fxsr != 0)
- result |= CPU_FXSR;
- // HT flag is set for multi-core processors also.
- if (threads_per_core() > 1)
- result |= CPU_HT;
- if (_cpuid_info.std_cpuid1_rdx.bits.mmx != 0 || is_amd() &&
- _cpuid_info.ext_cpuid1_rdx.bits.mmx != 0)
- result |= CPU_MMX;
- if (is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow != 0)
- result |= CPU_3DNOW;
- if (_cpuid_info.std_cpuid1_rdx.bits.sse != 0)
- result |= CPU_SSE;
- if (_cpuid_info.std_cpuid1_rdx.bits.sse2 != 0)
- result |= CPU_SSE2;
- if (_cpuid_info.std_cpuid1_rcx.bits.sse3 != 0)
- result |= CPU_SSE3;
- if (_cpuid_info.std_cpuid1_rcx.bits.ssse3 != 0)
- result |= CPU_SSSE3;
- if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0)
- result |= CPU_SSE4A;
- if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0)
- result |= CPU_SSE4_1;
- if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0)
- result |= CPU_SSE4_2;
- return result;
- }
-
- static void get_processor_features();
-
-public:
- // Offsets for cpuid asm stub
- static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
- static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_rax); }
- static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_rax); }
- static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_rax); }
- static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_rax); }
- static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_rax); }
-
- // Initialization
- static void initialize();
-
- // Asserts
- static void assert_is_initialized() {
- assert(_cpuid_info.std_cpuid1_rax.bits.family != 0, "VM_Version not initialized");
- }
-
- //
- // Processor family:
- // 3 - 386
- // 4 - 486
- // 5 - Pentium
- // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
- // Pentium M, Core Solo, Core Duo, Core2 Duo
- // family 6 model: 9, 13, 14, 15
- // 0x0f - Pentium 4, Opteron
- //
- // Note: The cpu family should be used to select between
- // instruction sequences which are valid on all Intel
- // processors. Use the feature test functions below to
- // determine whether a particular instruction is supported.
- //
- static int cpu_family() { return _cpu;}
- static bool is_P6() { return cpu_family() >= 6; }
-
- static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
- static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
-
- static uint cores_per_cpu() {
- uint result = 1;
- if (is_intel()) {
- result = (_cpuid_info.dcp_cpuid4_rax.bits.cores_per_cpu + 1);
- } else if (is_amd()) {
- result = (_cpuid_info.ext_cpuid8_rcx.bits.cores_per_cpu + 1);
- }
- return result;
- }
-
- static uint threads_per_core() {
- uint result = 1;
- if (_cpuid_info.std_cpuid1_rdx.bits.ht != 0) {
- result = _cpuid_info.std_cpuid1_rbx.bits.threads_per_cpu /
- cores_per_cpu();
- }
- return result;
- }
-
- static intx L1_data_cache_line_size() {
- intx result = 0;
- if (is_intel()) {
- result = (_cpuid_info.dcp_cpuid4_rbx.bits.L1_line_size + 1);
- } else if (is_amd()) {
- result = _cpuid_info.ext_cpuid5_rcx.bits.L1_line_size;
- }
- if (result < 32) // not defined ?
- result = 32; // 32 bytes by default on x86
- return result;
- }
-
- //
- // Feature identification
- //
- static bool supports_cpuid() { return _cpuFeatures != 0; }
- static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
- static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; }
- static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; }
- static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; }
- static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; }
- static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; }
- static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
- static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
- static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
- static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
- static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
- //
- // AMD features
- //
- static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; }
- static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.mmx_amd != 0; }
- static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow2 != 0; }
- static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; }
-
- static bool supports_compare_and_exchange() { return true; }
-
- static const char* cpu_features() { return _features_str; }
-
- static intx allocate_prefetch_distance() {
- // This method should be called before allocate_prefetch_style().
- //
- // Hardware prefetching (distance/size in bytes):
- // Pentium 3 - 64 / 32
- // Pentium 4 - 256 / 128
- // Athlon - 64 / 32 ????
- // Opteron - 128 / 64 only when 2 sequential cache lines accessed
- // Core - 128 / 64
- //
- // Software prefetching (distance in bytes / instruction with best score):
- // Pentium 3 - 128 / prefetchnta
- // Pentium 4 - 512 / prefetchnta
- // Athlon - 128 / prefetchnta
- // Opteron - 256 / prefetchnta
- // Core - 256 / prefetchnta
- // It will be used only when AllocatePrefetchStyle > 0
-
- intx count = AllocatePrefetchDistance;
- if (count < 0) { // default ?
- if (is_amd()) { // AMD
- if (supports_sse2())
- count = 256; // Opteron
- else
- count = 128; // Athlon
- } else { // Intel
- if (supports_sse2())
- if (cpu_family() == 6) {
- count = 256; // Pentium M, Core, Core2
- } else {
- count = 512; // Pentium 4
- }
- else
- count = 128; // Pentium 3 (and all other old CPUs)
- }
- }
- return count;
- }
- static intx allocate_prefetch_style() {
- assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
- // Return 0 if AllocatePrefetchDistance was not defined or
- // prefetch instruction is not supported.
- return (AllocatePrefetchDistance > 0 &&
- (supports_3dnow() || supports_sse())) ? AllocatePrefetchStyle : 0;
- }
-};
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp Fri Feb 20 11:12:26 2009 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,419 +0,0 @@
-/*
- * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-# include "incls/_precompiled.incl"
-# include "incls/_vm_version_x86_64.cpp.incl"
-
-int VM_Version::_cpu;
-int VM_Version::_model;
-int VM_Version::_stepping;
-int VM_Version::_cpuFeatures;
-const char* VM_Version::_features_str = "";
-VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
-
-static BufferBlob* stub_blob;
-static const int stub_size = 300;
-
-extern "C" {
- typedef void (*getPsrInfo_stub_t)(void*);
-}
-static getPsrInfo_stub_t getPsrInfo_stub = NULL;
-
-
-class VM_Version_StubGenerator: public StubCodeGenerator {
- public:
-
- VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
-
- address generate_getPsrInfo() {
-
- Label std_cpuid1, ext_cpuid1, ext_cpuid5, done;
-
- StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
-# define __ _masm->
-
- address start = __ pc();
-
- //
- // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
- //
- // rcx and rdx are first and second argument registers on windows
-
- __ push(rbp);
- __ mov(rbp, c_rarg0); // cpuid_info address
- __ push(rbx);
- __ push(rsi);
-
- //
- // we have a chip which supports the "cpuid" instruction
- //
- __ xorl(rax, rax);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- __ cmpl(rax, 3); // Is cpuid(0x4) supported?
- __ jccb(Assembler::belowEqual, std_cpuid1);
-
- //
- // cpuid(0x4) Deterministic cache params
- //
- __ movl(rax, 4);
- __ xorl(rcx, rcx); // L1 cache
- __ cpuid();
- __ push(rax);
- __ andl(rax, 0x1f); // Determine if valid cache parameters used
- __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
- __ pop(rax);
- __ jccb(Assembler::equal, std_cpuid1);
-
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- //
- // Standard cpuid(0x1)
- //
- __ bind(std_cpuid1);
- __ movl(rax, 1);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- __ movl(rax, 0x80000000);
- __ cpuid();
- __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
- __ jcc(Assembler::belowEqual, done);
- __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
- __ jccb(Assembler::belowEqual, ext_cpuid1);
- __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
- __ jccb(Assembler::belowEqual, ext_cpuid5);
- //
- // Extended cpuid(0x80000008)
- //
- __ movl(rax, 0x80000008);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- //
- // Extended cpuid(0x80000005)
- //
- __ bind(ext_cpuid5);
- __ movl(rax, 0x80000005);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- //
- // Extended cpuid(0x80000001)
- //
- __ bind(ext_cpuid1);
- __ movl(rax, 0x80000001);
- __ cpuid();
- __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
- __ movl(Address(rsi, 0), rax);
- __ movl(Address(rsi, 4), rbx);
- __ movl(Address(rsi, 8), rcx);
- __ movl(Address(rsi,12), rdx);
-
- //
- // return
- //
- __ bind(done);
- __ pop(rsi);
- __ pop(rbx);
- __ pop(rbp);
- __ ret(0);
-
-# undef __
-
- return start;
- };
-};
-
-
-void VM_Version::get_processor_features() {
-
- _logical_processors_per_package = 1;
- // Get raw processor info
- getPsrInfo_stub(&_cpuid_info);
- assert_is_initialized();
- _cpu = extended_cpu_family();
- _model = extended_cpu_model();
- _stepping = cpu_stepping();
- _cpuFeatures = feature_flags();
- // Logical processors are only available on P4s and above,
- // and only if hyperthreading is available.
- _logical_processors_per_package = logical_processor_count();
- _supports_cx8 = supports_cmpxchg8();
- // OS should support SSE for x64 and hardware should support at least SSE2.
- if (!VM_Version::supports_sse2()) {
- vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
- }
- if (UseSSE < 4) {
- _cpuFeatures &= ~CPU_SSE4_1;
- _cpuFeatures &= ~CPU_SSE4_2;
- }
- if (UseSSE < 3) {
- _cpuFeatures &= ~CPU_SSE3;
- _cpuFeatures &= ~CPU_SSSE3;
- _cpuFeatures &= ~CPU_SSE4A;
- }
- if (UseSSE < 2)
- _cpuFeatures &= ~CPU_SSE2;
- if (UseSSE < 1)
- _cpuFeatures &= ~CPU_SSE;
-
- if (logical_processors_per_package() == 1) {
- // HT processor could be installed on a system which doesn't support HT.
- _cpuFeatures &= ~CPU_HT;
- }
-
- char buf[256];
- jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
- cores_per_cpu(), threads_per_core(),
- cpu_family(), _model, _stepping,
- (supports_cmov() ? ", cmov" : ""),
- (supports_cmpxchg8() ? ", cx8" : ""),
- (supports_fxsr() ? ", fxsr" : ""),
- (supports_mmx() ? ", mmx" : ""),
- (supports_sse() ? ", sse" : ""),
- (supports_sse2() ? ", sse2" : ""),
- (supports_sse3() ? ", sse3" : ""),
- (supports_ssse3()? ", ssse3": ""),
- (supports_sse4_1() ? ", sse4.1" : ""),
- (supports_sse4_2() ? ", sse4.2" : ""),
- (supports_mmx_ext() ? ", mmxext" : ""),
- (supports_3dnow() ? ", 3dnow" : ""),
- (supports_3dnow2() ? ", 3dnowext" : ""),
- (supports_sse4a() ? ", sse4a": ""),
- (supports_ht() ? ", ht": ""));
- _features_str = strdup(buf);
-
- // UseSSE is set to the smaller of what hardware supports and what
- // the command line requires. I.e., you cannot set UseSSE to 2 on
- // older Pentiums which do not support it.
- if( UseSSE > 4 ) UseSSE=4;
- if( UseSSE < 0 ) UseSSE=0;
- if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
- UseSSE = MIN2((intx)3,UseSSE);
- if( !supports_sse3() ) // Drop to 2 if no SSE3 support
- UseSSE = MIN2((intx)2,UseSSE);
- if( !supports_sse2() ) // Drop to 1 if no SSE2 support
- UseSSE = MIN2((intx)1,UseSSE);
- if( !supports_sse () ) // Drop to 0 if no SSE support
- UseSSE = 0;
-
- // On new cpus instructions which update whole XMM register should be used
- // to prevent partial register stall due to dependencies on high half.
- //
- // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
- // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
- // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
- // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
-
- if( is_amd() ) { // AMD cpus specific settings
- if( FLAG_IS_DEFAULT(UseAddressNop) ) {
- // Use it on all AMD cpus starting from Opteron (don't need
- // a cpu check since only Opteron and new cpus support 64-bits mode).
- UseAddressNop = true;
- }
- if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
- if( supports_sse4a() ) {
- UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
- } else {
- UseXmmLoadAndClearUpper = false;
- }
- }
- if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
- if( supports_sse4a() ) {
- UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
- } else {
- UseXmmRegToRegMoveAll = false;
- }
- }
- if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
- if( supports_sse4a() ) {
- UseXmmI2F = true;
- } else {
- UseXmmI2F = false;
- }
- }
- if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
- if( supports_sse4a() ) {
- UseXmmI2D = true;
- } else {
- UseXmmI2D = false;
- }
- }
- }
-
- if( is_intel() ) { // Intel cpus specific settings
- if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
- UseStoreImmI16 = false; // don't use it on Intel cpus
- }
- if( FLAG_IS_DEFAULT(UseAddressNop) ) {
- // Use it on all Intel cpus starting from PentiumPro
- // (don't need a cpu check since only new cpus support 64-bits mode).
- UseAddressNop = true;
- }
- if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
- UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
- }
- if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
- if( supports_sse3() ) {
- UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
- } else {
- UseXmmRegToRegMoveAll = false;
- }
- }
- if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
-#ifdef COMPILER2
- if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
- // For new Intel cpus do the next optimization:
- // don't align the beginning of a loop if there are enough instructions
- // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
- // in current fetch line (OptoLoopAlignment) or the padding
- // is big (> MaxLoopPad).
- // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
- // generated NOP instructions. 11 is the largest size of one
- // address NOP instruction '0F 1F' (see Assembler::nop(i)).
- MaxLoopPad = 11;
- }
-#endif // COMPILER2
- if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
- UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
- }
- if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
- if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
- UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
- }
- }
- }
- }
-
- assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
- assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
-
- // set valid Prefetch instruction
- if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
- if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
- if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
-
- if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
- if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
- if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
-
- // Allocation prefetch settings
- intx cache_line_size = L1_data_cache_line_size();
- if( cache_line_size > AllocatePrefetchStepSize )
- AllocatePrefetchStepSize = cache_line_size;
- if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
- AllocatePrefetchLines = 3; // Optimistic value
- assert(AllocatePrefetchLines > 0, "invalid value");
- if( AllocatePrefetchLines < 1 ) // set valid value in product VM
- AllocatePrefetchLines = 1; // Conservative value
-
- AllocatePrefetchDistance = allocate_prefetch_distance();
- AllocatePrefetchStyle = allocate_prefetch_style();
-
- if( AllocatePrefetchStyle == 2 && is_intel() &&
- cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
- AllocatePrefetchDistance = 384;
- }
- assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
-
- // Prefetch settings
- PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
- PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
- PrefetchFieldsAhead = prefetch_fields_ahead();
-
-#ifndef PRODUCT
- if (PrintMiscellaneous && Verbose) {
- tty->print_cr("Logical CPUs per core: %u",
- logical_processors_per_package());
- tty->print_cr("UseSSE=%d",UseSSE);
- tty->print("Allocation: ");
- if (AllocatePrefetchStyle <= 0) {
- tty->print_cr("no prefetching");
- } else {
- if (AllocatePrefetchInstr == 0) {
- tty->print("PREFETCHNTA");
- } else if (AllocatePrefetchInstr == 1) {
- tty->print("PREFETCHT0");
- } else if (AllocatePrefetchInstr == 2) {
- tty->print("PREFETCHT2");
- } else if (AllocatePrefetchInstr == 3) {
- tty->print("PREFETCHW");
- }
- if (AllocatePrefetchLines > 1) {
- tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
- } else {
- tty->print_cr(" %d, one line", AllocatePrefetchDistance);
- }
- }
- if (PrefetchCopyIntervalInBytes > 0) {
- tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
- }
- if (PrefetchScanIntervalInBytes > 0) {
- tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
- }
- if (PrefetchFieldsAhead > 0) {
- tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
- }
- }
-#endif // !PRODUCT
-}
-
-void VM_Version::initialize() {
- ResourceMark rm;
- // Making this stub must be FIRST use of assembler
-
- stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
- if (stub_blob == NULL) {
- vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
- }
- CodeBuffer c(stub_blob->instructions_begin(),
- stub_blob->instructions_size());
- VM_Version_StubGenerator g(&c);
- getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
- g.generate_getPsrInfo());
-
- get_processor_features();
-}
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp Fri Feb 20 11:12:26 2009 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,449 +0,0 @@
-/*
- * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-class VM_Version : public Abstract_VM_Version {
-public:
- // cpuid result register layouts. These are all unions of a uint32_t
- // (in case anyone wants access to the register as a whole) and a bitfield.
-
- union StdCpuid1Eax {
- uint32_t value;
- struct {
- uint32_t stepping : 4,
- model : 4,
- family : 4,
- proc_type : 2,
- : 2,
- ext_model : 4,
- ext_family : 8,
- : 4;
- } bits;
- };
-
- union StdCpuid1Ebx { // example, unused
- uint32_t value;
- struct {
- uint32_t brand_id : 8,
- clflush_size : 8,
- threads_per_cpu : 8,
- apic_id : 8;
- } bits;
- };
-
- union StdCpuid1Ecx {
- uint32_t value;
- struct {
- uint32_t sse3 : 1,
- : 2,
- monitor : 1,
- : 1,
- vmx : 1,
- : 1,
- est : 1,
- : 1,
- ssse3 : 1,
- cid : 1,
- : 2,
- cmpxchg16: 1,
- : 4,
- dca : 1,
- sse4_1 : 1,
- sse4_2 : 1,
- : 11;
- } bits;
- };
-
- union StdCpuid1Edx {
- uint32_t value;
- struct {
- uint32_t : 4,
- tsc : 1,
- : 3,
- cmpxchg8 : 1,
- : 6,
- cmov : 1,
- : 7,
- mmx : 1,
- fxsr : 1,
- sse : 1,
- sse2 : 1,
- : 1,
- ht : 1,
- : 3;
- } bits;
- };
-
- union DcpCpuid4Eax {
- uint32_t value;
- struct {
- uint32_t cache_type : 5,
- : 21,
- cores_per_cpu : 6;
- } bits;
- };
-
- union DcpCpuid4Ebx {
- uint32_t value;
- struct {
- uint32_t L1_line_size : 12,
- partitions : 10,
- associativity : 10;
- } bits;
- };
-
- union ExtCpuid1Edx {
- uint32_t value;
- struct {
- uint32_t : 22,
- mmx_amd : 1,
- mmx : 1,
- fxsr : 1,
- : 4,
- long_mode : 1,
- tdnow2 : 1,
- tdnow : 1;
- } bits;
- };
-
- union ExtCpuid1Ecx {
- uint32_t value;
- struct {
- uint32_t LahfSahf : 1,
- CmpLegacy : 1,
- : 4,
- abm : 1,
- sse4a : 1,
- misalignsse : 1,
- prefetchw : 1,
- : 22;
- } bits;
- };
-
- union ExtCpuid5Ex {
- uint32_t value;
- struct {
- uint32_t L1_line_size : 8,
- L1_tag_lines : 8,
- L1_assoc : 8,
- L1_size : 8;
- } bits;
- };
-
- union ExtCpuid8Ecx {
- uint32_t value;
- struct {
- uint32_t cores_per_cpu : 8,
- : 24;
- } bits;
- };
-
-protected:
- static int _cpu;
- static int _model;
- static int _stepping;
- static int _cpuFeatures; // features returned by the "cpuid" instruction
- // 0 if this instruction is not available
- static const char* _features_str;
-
- enum {
- CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX)
- CPU_CMOV = (1 << 1),
- CPU_FXSR = (1 << 2),
- CPU_HT = (1 << 3),
- CPU_MMX = (1 << 4),
- CPU_3DNOW= (1 << 5),
- CPU_SSE = (1 << 6),
- CPU_SSE2 = (1 << 7),
- CPU_SSE3 = (1 << 8),
- CPU_SSSE3= (1 << 9),
- CPU_SSE4A= (1 <<10),
- CPU_SSE4_1 = (1 << 11),
- CPU_SSE4_2 = (1 << 12)
- } cpuFeatureFlags;
-
- // cpuid information block. All info derived from executing cpuid with
- // various function numbers is stored here. Intel and AMD info is
- // merged in this block: accessor methods disentangle it.
- //
- // The info block is laid out in subblocks of 4 dwords corresponding to
- // eax, ebx, ecx and edx, whether or not they contain anything useful.
- struct CpuidInfo {
- // cpuid function 0
- uint32_t std_max_function;
- uint32_t std_vendor_name_0;
- uint32_t std_vendor_name_1;
- uint32_t std_vendor_name_2;
-
- // cpuid function 1
- StdCpuid1Eax std_cpuid1_eax;
- StdCpuid1Ebx std_cpuid1_ebx;
- StdCpuid1Ecx std_cpuid1_ecx;
- StdCpuid1Edx std_cpuid1_edx;
-
- // cpuid function 4 (deterministic cache parameters)
- DcpCpuid4Eax dcp_cpuid4_eax;
- DcpCpuid4Ebx dcp_cpuid4_ebx;
- uint32_t dcp_cpuid4_ecx; // unused currently
- uint32_t dcp_cpuid4_edx; // unused currently
-
- // cpuid function 0x80000000 // example, unused
- uint32_t ext_max_function;
- uint32_t ext_vendor_name_0;
- uint32_t ext_vendor_name_1;
- uint32_t ext_vendor_name_2;
-
- // cpuid function 0x80000001
- uint32_t ext_cpuid1_eax; // reserved
- uint32_t ext_cpuid1_ebx; // reserved
- ExtCpuid1Ecx ext_cpuid1_ecx;
- ExtCpuid1Edx ext_cpuid1_edx;
-
- // cpuid functions 0x80000002 thru 0x80000004: example, unused
- uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
- uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
- uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
-
- // cpuid function 0x80000005 //AMD L1, Intel reserved
- uint32_t ext_cpuid5_eax; // unused currently
- uint32_t ext_cpuid5_ebx; // reserved
- ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD)
- ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD)
-
- // cpuid function 0x80000008
- uint32_t ext_cpuid8_eax; // unused currently
- uint32_t ext_cpuid8_ebx; // reserved
- ExtCpuid8Ecx ext_cpuid8_ecx;
- uint32_t ext_cpuid8_edx; // reserved
- };
-
- // The actual cpuid info block
- static CpuidInfo _cpuid_info;
-
- // Extractors and predicates
- static uint32_t extended_cpu_family() {
- uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
- result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
- return result;
- }
- static uint32_t extended_cpu_model() {
- uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
- result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
- return result;
- }
- static uint32_t cpu_stepping() {
- uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
- return result;
- }
- static uint logical_processor_count() {
- uint result = threads_per_core();
- return result;
- }
- static uint32_t feature_flags() {
- uint32_t result = 0;
- if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
- result |= CPU_CX8;
- if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
- result |= CPU_CMOV;
- if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() &&
- _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)
- result |= CPU_FXSR;
- // HT flag is set for multi-core processors also.
- if (threads_per_core() > 1)
- result |= CPU_HT;
- if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
- _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
- result |= CPU_MMX;
- if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
- result |= CPU_3DNOW;
- if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
- result |= CPU_SSE;
- if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
- result |= CPU_SSE2;
- if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
- result |= CPU_SSE3;
- if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
- result |= CPU_SSSE3;
- if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
- result |= CPU_SSE4A;
- if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
- result |= CPU_SSE4_1;
- if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
- result |= CPU_SSE4_2;
- return result;
- }
-
- static void get_processor_features();
-
-public:
- // Offsets for cpuid asm stub
- static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
- static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
- static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
- static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
- static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
- static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
-
- // Initialization
- static void initialize();
-
- // Asserts
- static void assert_is_initialized() {
- assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
- }
-
- //
- // Processor family:
- // 3 - 386
- // 4 - 486
- // 5 - Pentium
- // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
- // Pentium M, Core Solo, Core Duo, Core2 Duo
- // family 6 model: 9, 13, 14, 15
- // 0x0f - Pentium 4, Opteron
- //
- // Note: The cpu family should be used to select between
- // instruction sequences which are valid on all Intel
- // processors. Use the feature test functions below to
- // determine whether a particular instruction is supported.
- //
- static int cpu_family() { return _cpu;}
- static bool is_P6() { return cpu_family() >= 6; }
-
- static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
- static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
-
- static uint cores_per_cpu() {
- uint result = 1;
- if (is_intel()) {
- result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
- } else if (is_amd()) {
- result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
- }
- return result;
- }
-
- static uint threads_per_core() {
- uint result = 1;
- if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
- result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
- cores_per_cpu();
- }
- return result;
- }
-
- static intx L1_data_cache_line_size() {
- intx result = 0;
- if (is_intel()) {
- result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
- } else if (is_amd()) {
- result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
- }
- if (result < 32) // not defined ?
- result = 32; // 32 bytes by default for other x64
- return result;
- }
-
- //
- // Feature identification
- //
- static bool supports_cpuid() { return _cpuFeatures != 0; }
- static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
- static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; }
- static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; }
- static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; }
- static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; }
- static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; }
- static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
- static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
- static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
- static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
- static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
- //
- // AMD features
- //
- static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; }
- static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
- static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
- static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; }
-
- static bool supports_compare_and_exchange() { return true; }
-
- static const char* cpu_features() { return _features_str; }
-
- static intx allocate_prefetch_distance() {
- // This method should be called before allocate_prefetch_style().
- //
- // Hardware prefetching (distance/size in bytes):
- // Pentium 4 - 256 / 128
- // Opteron - 128 / 64 only when 2 sequential cache lines accessed
- // Core - 128 / 64
- //
- // Software prefetching (distance in bytes / instruction with best score):
- // Pentium 4 - 512 / prefetchnta
- // Opteron - 256 / prefetchnta
- // Core - 256 / prefetchnta
- // It will be used only when AllocatePrefetchStyle > 0
-
- intx count = AllocatePrefetchDistance;
- if (count < 0) { // default ?
- if (is_amd()) { // AMD
- count = 256; // Opteron
- } else { // Intel
- if (cpu_family() == 6) {
- count = 256;// Pentium M, Core, Core2
- } else {
- count = 512;// Pentium 4
- }
- }
- }
- return count;
- }
- static intx allocate_prefetch_style() {
- assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
- // Return 0 if AllocatePrefetchDistance was not defined.
- return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
- }
-
- // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
- // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
- // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
- // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
-
- // gc copy/scan is disabled if prefetchw isn't supported, because
- // Prefetch::write emits an inlined prefetchw on Linux.
- // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
- // The used prefetcht0 instruction works for both amd64 and em64t.
- static intx prefetch_copy_interval_in_bytes() {
- intx interval = PrefetchCopyIntervalInBytes;
- return interval >= 0 ? interval : 576;
- }
- static intx prefetch_scan_interval_in_bytes() {
- intx interval = PrefetchScanIntervalInBytes;
- return interval >= 0 ? interval : 576;
- }
- static intx prefetch_fields_ahead() {
- intx count = PrefetchFieldsAhead;
- return count >= 0 ? count : 1;
- }
-};
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Fri Feb 20 11:12:26 2009 -0800
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Tue Feb 24 09:53:20 2009 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -299,14 +299,18 @@
}
+#endif // AMD64
+
bool os::supports_sse() {
+#ifdef AMD64
+ return true;
+#else
if (sse_status == SSE_UNKNOWN)
check_for_sse_support();
return sse_status == SSE_SUPPORTED;
+#endif // AMD64
}
-#endif // AMD64
-
bool os::is_allocatable(size_t bytes) {
#ifdef AMD64
return true;
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Fri Feb 20 11:12:26 2009 -0800
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Tue Feb 24 09:53:20 2009 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright 1999-2004 Sun Microsystems, Inc. All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -41,8 +41,9 @@
static void fence_bootstrap ();
static void setup_fpu();
+#endif // AMD64
+
static bool supports_sse();
-#endif // AMD64
static bool is_allocatable(size_t bytes);
--- a/hotspot/src/share/vm/includeDB_core Fri Feb 20 11:12:26 2009 -0800
+++ b/hotspot/src/share/vm/includeDB_core Tue Feb 24 09:53:20 2009 -0800
@@ -1,5 +1,5 @@
//
-// Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved.
+// Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -176,7 +176,7 @@
arguments.cpp oop.inline.hpp
arguments.cpp os_<os_family>.inline.hpp
arguments.cpp universe.inline.hpp
-arguments.cpp vm_version_<arch_model>.hpp
+arguments.cpp vm_version_<arch>.hpp
arguments.hpp java.hpp
arguments.hpp perfData.hpp
@@ -241,7 +241,7 @@
assembler.hpp register_<arch>.hpp
assembler.hpp relocInfo.hpp
assembler.hpp top.hpp
-assembler.hpp vm_version_<arch_model>.hpp
+assembler.hpp vm_version_<arch>.hpp
assembler.inline.hpp assembler.hpp
assembler.inline.hpp codeBuffer.hpp
@@ -280,7 +280,7 @@
atomic_<os_arch>.inline.hpp atomic.hpp
atomic_<os_arch>.inline.hpp os.hpp
-atomic_<os_arch>.inline.hpp vm_version_<arch_model>.hpp
+atomic_<os_arch>.inline.hpp vm_version_<arch>.hpp
// attachListener is jck optional, put cpp deps in includeDB_features
@@ -2176,7 +2176,7 @@
interpreterRuntime.cpp threadCritical.hpp
interpreterRuntime.cpp universe.inline.hpp
interpreterRuntime.cpp vmSymbols.hpp
-interpreterRuntime.cpp vm_version_<arch_model>.hpp
+interpreterRuntime.cpp vm_version_<arch>.hpp
interpreterRuntime.hpp bytecode.hpp
interpreterRuntime.hpp frame.inline.hpp
@@ -2279,7 +2279,7 @@
java.cpp universe.hpp
java.cpp vmError.hpp
java.cpp vm_operations.hpp
-java.cpp vm_version_<arch_model>.hpp
+java.cpp vm_version_<arch>.hpp
java.cpp vtune.hpp
java.hpp os.hpp
@@ -3485,7 +3485,7 @@
register_<arch>.cpp register_<arch>.hpp
register_<arch>.hpp register.hpp
-register_<arch>.hpp vm_version_<arch_model>.hpp
+register_<arch>.hpp vm_version_<arch>.hpp
registerMap.hpp globalDefinitions.hpp
registerMap.hpp register_<arch>.hpp
@@ -3835,7 +3835,7 @@
statSampler.cpp statSampler.hpp
statSampler.cpp systemDictionary.hpp
statSampler.cpp vmSymbols.hpp
-statSampler.cpp vm_version_<arch_model>.hpp
+statSampler.cpp vm_version_<arch>.hpp
statSampler.hpp perfData.hpp
statSampler.hpp task.hpp
@@ -4579,22 +4579,22 @@
vm_version.cpp arguments.hpp
vm_version.cpp oop.inline.hpp
vm_version.cpp universe.hpp
-vm_version.cpp vm_version_<arch_model>.hpp
+vm_version.cpp vm_version_<arch>.hpp
vm_version.hpp allocation.hpp
vm_version.hpp ostream.hpp
-vm_version_<arch_model>.cpp assembler_<arch>.inline.hpp
-vm_version_<arch_model>.cpp java.hpp
-vm_version_<arch_model>.cpp os_<os_family>.inline.hpp
-vm_version_<arch_model>.cpp resourceArea.hpp
-vm_version_<arch_model>.cpp stubCodeGenerator.hpp
-vm_version_<arch_model>.cpp vm_version_<arch_model>.hpp
-
-vm_version_<arch_model>.hpp globals_extension.hpp
-vm_version_<arch_model>.hpp vm_version.hpp
-
-vm_version_<os_arch>.cpp vm_version_<arch_model>.hpp
+vm_version_<arch>.cpp assembler_<arch>.inline.hpp
+vm_version_<arch>.cpp java.hpp
+vm_version_<arch>.cpp os_<os_family>.inline.hpp
+vm_version_<arch>.cpp resourceArea.hpp
+vm_version_<arch>.cpp stubCodeGenerator.hpp
+vm_version_<arch>.cpp vm_version_<arch>.hpp
+
+vm_version_<arch>.hpp globals_extension.hpp
+vm_version_<arch>.hpp vm_version.hpp
+
+vm_version_<os_arch>.cpp vm_version_<arch>.hpp
vmreg.cpp assembler.hpp
vmreg.cpp vmreg.hpp
--- a/hotspot/src/share/vm/opto/escape.cpp Fri Feb 20 11:12:26 2009 -0800
+++ b/hotspot/src/share/vm/opto/escape.cpp Tue Feb 24 09:53:20 2009 -0800
@@ -756,6 +756,16 @@
} else {
break;
}
+ } else if (result->Opcode() == Op_SCMemProj) {
+ assert(result->in(0)->is_LoadStore(), "sanity");
+ const Type *at = phase->type(result->in(0)->in(MemNode::Address));
+ if (at != Type::TOP) {
+ assert (at->isa_ptr() != NULL, "pointer type required.");
+ int idx = C->get_alias_index(at->is_ptr());
+ assert(idx != alias_idx, "Object is not scalar replaceable if a LoadStore node access its field");
+ break;
+ }
+ result = result->in(0)->in(MemNode::Memory);
}
}
if (result->is_Phi()) {
--- a/hotspot/src/share/vm/opto/macro.cpp Fri Feb 20 11:12:26 2009 -0800
+++ b/hotspot/src/share/vm/opto/macro.cpp Tue Feb 24 09:53:20 2009 -0800
@@ -250,6 +250,15 @@
assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw");
}
mem = mem->in(MemNode::Memory);
+ } else if (mem->Opcode() == Op_SCMemProj) {
+ assert(mem->in(0)->is_LoadStore(), "sanity");
+ const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr();
+ int adr_idx = Compile::current()->get_alias_index(atype);
+ if (adr_idx == alias_idx) {
+ assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
+ return NULL;
+ }
+ mem = mem->in(0)->in(MemNode::Memory);
} else {
return mem;
}
@@ -329,8 +338,15 @@
return NULL;
}
values.at_put(j, val);
+ } else if (val->Opcode() == Op_SCMemProj) {
+ assert(val->in(0)->is_LoadStore(), "sanity");
+ assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
+ return NULL;
} else {
+#ifdef ASSERT
+ val->dump();
assert(false, "unknown node on this path");
+#endif
return NULL; // unknown node on this path
}
}
--- a/hotspot/src/share/vm/opto/matcher.cpp Fri Feb 20 11:12:26 2009 -0800
+++ b/hotspot/src/share/vm/opto/matcher.cpp Tue Feb 24 09:53:20 2009 -0800
@@ -1707,11 +1707,18 @@
void Matcher::find_shared( Node *n ) {
// Allocate stack of size C->unique() * 2 to avoid frequent realloc
MStack mstack(C->unique() * 2);
+ // Mark nodes as address_visited if they are inputs to an address expression
+ VectorSet address_visited(Thread::current()->resource_area());
mstack.push(n, Visit); // Don't need to pre-visit root node
while (mstack.is_nonempty()) {
n = mstack.node(); // Leave node on stack
Node_State nstate = mstack.state();
+ uint nop = n->Opcode();
if (nstate == Pre_Visit) {
+ if (address_visited.test(n->_idx)) { // Visited in address already?
+ // Flag as visited and shared now.
+ set_visited(n);
+ }
if (is_visited(n)) { // Visited already?
// Node is shared and has no reason to clone. Flag it as shared.
// This causes it to match into a register for the sharing.
@@ -1726,7 +1733,7 @@
set_visited(n); // Flag as visited now
bool mem_op = false;
- switch( n->Opcode() ) { // Handle some opcodes special
+ switch( nop ) { // Handle some opcodes special
case Op_Phi: // Treat Phis as shared roots
case Op_Parm:
case Op_Proj: // All handled specially during matching
@@ -1887,34 +1894,51 @@
// to have a single use so force sharing here.
set_shared(m->in(AddPNode::Base)->in(1));
}
+
+ // Some inputs for address expression are not put on stack
+ // to avoid marking them as shared and forcing them into register
+ // if they are used only in address expressions.
+ // But they should be marked as shared if there are other uses
+ // besides address expressions.
+
Node *off = m->in(AddPNode::Offset);
- if( off->is_Con() ) {
- set_visited(m); // Flag as visited now
+ if( off->is_Con() &&
+ // When there are other uses besides address expressions
+ // put it on stack and mark as shared.
+ !is_visited(m) ) {
+ address_visited.test_set(m->_idx); // Flag as address_visited
Node *adr = m->in(AddPNode::Address);
// Intel, ARM and friends can handle 2 adds in addressing mode
if( clone_shift_expressions && adr->is_AddP() &&
// AtomicAdd is not an addressing expression.
// Cheap to find it by looking for screwy base.
- !adr->in(AddPNode::Base)->is_top() ) {
- set_visited(adr); // Flag as visited now
+ !adr->in(AddPNode::Base)->is_top() &&
+ // Are there other uses besides address expressions?
+ !is_visited(adr) ) {
+ address_visited.set(adr->_idx); // Flag as address_visited
Node *shift = adr->in(AddPNode::Offset);
// Check for shift by small constant as well
if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
- shift->in(2)->get_int() <= 3 ) {
- set_visited(shift); // Flag as visited now
+ shift->in(2)->get_int() <= 3 &&
+ // Are there other uses besides address expressions?
+ !is_visited(shift) ) {
+ address_visited.set(shift->_idx); // Flag as address_visited
mstack.push(shift->in(2), Visit);
+ Node *conv = shift->in(1);
#ifdef _LP64
// Allow Matcher to match the rule which bypass
// ConvI2L operation for an array index on LP64
// if the index value is positive.
- if( shift->in(1)->Opcode() == Op_ConvI2L &&
- shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) {
- set_visited(shift->in(1)); // Flag as visited now
- mstack.push(shift->in(1)->in(1), Pre_Visit);
+ if( conv->Opcode() == Op_ConvI2L &&
+ conv->as_Type()->type()->is_long()->_lo >= 0 &&
+ // Are there other uses besides address expressions?
+ !is_visited(conv) ) {
+ address_visited.set(conv->_idx); // Flag as address_visited
+ mstack.push(conv->in(1), Pre_Visit);
} else
#endif
- mstack.push(shift->in(1), Pre_Visit);
+ mstack.push(conv, Pre_Visit);
} else {
mstack.push(shift, Pre_Visit);
}
--- a/hotspot/src/share/vm/opto/memnode.cpp Fri Feb 20 11:12:26 2009 -0800
+++ b/hotspot/src/share/vm/opto/memnode.cpp Tue Feb 24 09:53:20 2009 -0800
@@ -1066,11 +1066,11 @@
break;
}
}
- LoadNode* load = NULL;
- if (allocation != NULL && base->in(load_index)->is_Load()) {
- load = base->in(load_index)->as_Load();
- }
- if (load != NULL && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) {
+ bool has_load = ( allocation != NULL &&
+ (base->in(load_index)->is_Load() ||
+ base->in(load_index)->is_DecodeN() &&
+ base->in(load_index)->in(1)->is_Load()) );
+ if (has_load && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) {
// Push the loads from the phi that comes from valueOf up
// through it to allow elimination of the loads and the recovery
// of the original value.
@@ -1106,11 +1106,20 @@
result->set_req(load_index, in2);
return result;
}
- } else if (base->is_Load()) {
+ } else if (base->is_Load() ||
+ base->is_DecodeN() && base->in(1)->is_Load()) {
+ if (base->is_DecodeN()) {
+ // Get LoadN node which loads cached Integer object
+ base = base->in(1);
+ }
// Eliminate the load of Integer.value for integers from the cache
// array by deriving the value from the index into the array.
// Capture the offset of the load and then reverse the computation.
Node* load_base = base->in(Address)->in(AddPNode::Base);
+ if (load_base->is_DecodeN()) {
+ // Get LoadN node which loads IntegerCache.cache field
+ load_base = load_base->in(1);
+ }
if (load_base != NULL) {
Compile::AliasType* atp = phase->C->alias_type(load_base->adr_type());
intptr_t cache_offset;