8194279: support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai
authordholmes
Thu, 04 Jan 2018 22:54:40 -0500
changeset 48489 a5548cf24286
parent 48488 51825789dd89
child 48490 4f647519c8be
child 48611 4d7a4fad8190
8194279: support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai Reviewed-by: dholmes, kvn Contributed-by: Vic Wang <vicwang@zhaoxin.com>
src/hotspot/cpu/x86/assembler_x86.cpp
src/hotspot/cpu/x86/vm_version_x86.cpp
src/hotspot/cpu/x86/vm_version_x86.hpp
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Thu Jan 04 18:18:18 2018 -0500
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Thu Jan 04 22:54:40 2018 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3167,6 +3167,89 @@
     return;
   }
 
+  if (UseAddressNop && VM_Version::is_zx()) {
+    //
+    // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
+    //  1: 0x90
+    //  2: 0x66 0x90
+    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
+    //  4: 0x0F 0x1F 0x40 0x00
+    //  5: 0x0F 0x1F 0x44 0x00 0x00
+    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
+    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+
+    // The rest coding is ZX specific - don't use consecutive address nops
+
+    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+
+    while (i >= 15) {
+      // For ZX don't generate consecutive addess nops (mix with regular nops)
+      i -= 15;
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      addr_nop_8();
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8((unsigned char)0x90);
+                         // nop
+    }
+    switch (i) {
+      case 14:
+        emit_int8(0x66); // size prefix
+      case 13:
+        emit_int8(0x66); // size prefix
+      case 12:
+        addr_nop_8();
+        emit_int8(0x66); // size prefix
+        emit_int8(0x66); // size prefix
+        emit_int8(0x66); // size prefix
+        emit_int8((unsigned char)0x90);
+                         // nop
+        break;
+      case 11:
+        emit_int8(0x66); // size prefix
+      case 10:
+        emit_int8(0x66); // size prefix
+      case 9:
+        emit_int8(0x66); // size prefix
+      case 8:
+        addr_nop_8();
+        break;
+      case 7:
+        addr_nop_7();
+        break;
+      case 6:
+        emit_int8(0x66); // size prefix
+      case 5:
+        addr_nop_5();
+        break;
+      case 4:
+        addr_nop_4();
+        break;
+      case 3:
+        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
+        emit_int8(0x66); // size prefix
+      case 2:
+        emit_int8(0x66); // size prefix
+      case 1:
+        emit_int8((unsigned char)0x90);
+                         // nop
+        break;
+      default:
+        assert(i == 0, " ");
+    }
+    return;
+  }
+
   // Using nops with size prefixes "0x66 0x90".
   // From AMD Optimization Guide:
   //  1: 0x90
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Thu Jan 04 18:18:18 2018 -0500
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Thu Jan 04 22:54:40 2018 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -628,6 +628,11 @@
   if (UseSSE < 1)
     _features &= ~CPU_SSE;
 
+  //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
+  if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
+    UseAVX = 0;
+  }
+
   // first try initial setting and detect what we can support
   int use_avx_limit = 0;
   if (UseAVX > 0) {
@@ -1078,6 +1083,66 @@
   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
 
+
+  if (is_zx()) { // ZX cpus specific settings
+    if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
+      UseStoreImmI16 = false; // don't use it on ZX cpus
+    }
+    if ((cpu_family() == 6) || (cpu_family() == 7)) {
+      if (FLAG_IS_DEFAULT(UseAddressNop)) {
+        // Use it on all ZX cpus
+        UseAddressNop = true;
+      }
+    }
+    if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
+      UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
+    }
+    if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
+      if (supports_sse3()) {
+        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
+      } else {
+        UseXmmRegToRegMoveAll = false;
+      }
+    }
+    if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
+#ifdef COMPILER2
+      if (FLAG_IS_DEFAULT(MaxLoopPad)) {
+        // For new ZX cpus do the next optimization:
+        // don't align the beginning of a loop if there are enough instructions
+        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
+        // in current fetch line (OptoLoopAlignment) or the padding
+        // is big (> MaxLoopPad).
+        // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
+        // generated NOP instructions. 11 is the largest size of one
+        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
+        MaxLoopPad = 11;
+      }
+#endif // COMPILER2
+      if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+        UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
+      }
+      if (supports_sse4_2()) { // new ZX cpus
+        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+          UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
+        }
+      }
+      if (supports_sse4_2()) {
+        if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
+          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
+        }
+      } else {
+        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
+        }
+        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
+      }
+    }
+
+    if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
+      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
+    }
+  }
+
   if( is_amd() ) { // AMD cpus specific settings
     if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
       // Use it on new AMD cpus starting from Opteron.
@@ -1374,6 +1439,14 @@
 #endif
   }
 
+  if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
+#ifdef COMPILER2
+    if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+    }
+#endif
+  }
+
 #ifdef _LP64
   // Prefetch settings
 
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp	Thu Jan 04 18:18:18 2018 -0500
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp	Thu Jan 04 22:54:40 2018 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -305,6 +305,9 @@
   enum Extended_Family {
     // AMD
     CPU_FAMILY_AMD_11H       = 0x11,
+    // ZX
+    CPU_FAMILY_ZX_CORE_F6    = 6,
+    CPU_FAMILY_ZX_CORE_F7    = 7,
     // Intel
     CPU_FAMILY_INTEL_CORE    = 6,
     CPU_MODEL_NEHALEM        = 0x1e,
@@ -549,6 +552,16 @@
       }
     }
 
+    // ZX features.
+    if (is_zx()) {
+      if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
+        result |= CPU_LZCNT;
+      // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
+      if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
+        result |= CPU_3DNOW_PREFETCH;
+      }
+    }
+
     return result;
   }
 
@@ -657,6 +670,7 @@
   static bool is_P6()             { return cpu_family() >= 6; }
   static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
   static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
+  static bool is_zx()             { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS  '
   static bool is_atom_family()    { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
   static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
 
@@ -680,6 +694,15 @@
       }
     } else if (is_amd()) {
       result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
+    } else if (is_zx()) {
+      bool supports_topology = supports_processor_topology();
+      if (supports_topology) {
+        result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
+                 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
+      }
+      if (!supports_topology || result == 0) {
+        result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
+      }
     }
     return result;
   }
@@ -688,6 +711,8 @@
     uint result = 1;
     if (is_intel() && supports_processor_topology()) {
       result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
+    } else if (is_zx() && supports_processor_topology()) {
+      result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
     } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
       if (cpu_family() >= 0x17) {
         result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
@@ -705,6 +730,8 @@
       result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
     } else if (is_amd()) {
       result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
+    } else if (is_zx()) {
+      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
     }
     if (result < 32) // not defined ?
       result = 32;   // 32 bytes by default on x86 and other x64