8187219: Newer AMD 17h (EPYC) Processor family defaults
authorkvn
Tue, 03 Oct 2017 15:32:27 -0700
changeset 47582 fde01e0fccb4
parent 47581 c5057bf6617f
child 47583 2dcbf51d0431
8187219: Newer AMD 17h (EPYC) Processor family defaults Reviewed-by: dholmes, kvn Contributed-by: rohitarulraj@gmail.com
src/hotspot/cpu/x86/vm_version_x86.cpp
src/hotspot/cpu/x86/vm_version_x86.hpp
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Tue Oct 03 21:21:35 2017 +0000
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Tue Oct 03 15:32:27 2017 -0700
@@ -46,7 +46,7 @@
 address VM_Version::_cpuinfo_cont_addr = 0;
 
 static BufferBlob* stub_blob;
-static const int stub_size = 1000;
+static const int stub_size = 1100;
 
 extern "C" {
   typedef void (*get_cpu_info_stub_t)(void*);
@@ -70,7 +70,7 @@
     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 
     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
-    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
+    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 
     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -267,14 +267,30 @@
     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
     __ jcc(Assembler::belowEqual, done);
     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid1);
+    __ jcc(Assembler::belowEqual, ext_cpuid1);
     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid5);
     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid7);
+    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid8);
+    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
+    __ jccb(Assembler::below, ext_cpuid8);
+    //
+    // Extended cpuid(0x8000001E)
+    //
+    __ movl(rax, 0x8000001E);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
     //
     // Extended cpuid(0x80000008)
     //
+    __ bind(ext_cpuid8);
     __ movl(rax, 0x80000008);
     __ cpuid();
     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@@ -1109,11 +1125,27 @@
     }
 
 #ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      // Limit vectors size to 16 bytes on current AMD cpus.
+    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on AMD cpus < 17h.
       FLAG_SET_DEFAULT(MaxVectorSize, 16);
     }
 #endif // COMPILER2
+
+    // Some defaults for AMD family 17h
+    if ( cpu_family() == 0x17 ) {
+      // On family 17h processors use XMM and UnalignedLoadStores for Array Copy
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
+      }
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
+      }
+#ifdef COMPILER2
+      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+      }
+#endif
+    }
   }
 
   if( is_intel() ) { // Intel cpus specific settings
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp	Tue Oct 03 21:21:35 2017 +0000
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp	Tue Oct 03 15:32:27 2017 -0700
@@ -228,6 +228,15 @@
     } bits;
   };
 
+  union ExtCpuid1EEbx {
+    uint32_t value;
+    struct {
+      uint32_t                  : 8,
+               threads_per_core : 8,
+                                : 16;
+    } bits;
+  };
+
   union XemXcr0Eax {
     uint32_t value;
     struct {
@@ -398,6 +407,12 @@
     ExtCpuid8Ecx ext_cpuid8_ecx;
     uint32_t     ext_cpuid8_edx; // reserved
 
+    // cpuid function 0x8000001E // AMD 17h
+    uint32_t      ext_cpuid1E_eax;
+    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
+    uint32_t      ext_cpuid1E_ecx;
+    uint32_t      ext_cpuid1E_edx; // unused currently
+
     // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
     XemXcr0Eax   xem_xcr0_eax;
     uint32_t     xem_xcr0_edx; // reserved
@@ -505,6 +520,14 @@
       result |= CPU_CLMUL;
     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
       result |= CPU_RTM;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+       result |= CPU_ADX;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+      result |= CPU_BMI2;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+      result |= CPU_SHA;
+    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
+      result |= CPU_FMA;
 
     // AMD features.
     if (is_amd()) {
@@ -518,16 +541,8 @@
     }
     // Intel features.
     if(is_intel()) {
-      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
-         result |= CPU_ADX;
-      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
-        result |= CPU_BMI2;
-      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
-        result |= CPU_SHA;
       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
         result |= CPU_LZCNT;
-      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
-        result |= CPU_FMA;
       // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
         result |= CPU_3DNOW_PREFETCH;
@@ -590,6 +605,7 @@
   static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
   static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+  static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
   static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
   static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@@ -673,8 +689,12 @@
     if (is_intel() && supports_processor_topology()) {
       result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
     } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
-      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
-               cores_per_cpu();
+      if (cpu_family() >= 0x17) {
+        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
+      } else {
+        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+                 cores_per_cpu();
+      }
     }
     return (result == 0 ? 1 : result);
   }