8194279: support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai
Reviewed-by: dholmes, kvn
Contributed-by: Vic Wang <vicwang@zhaoxin.com>
--- a/src/hotspot/cpu/x86/assembler_x86.cpp Thu Jan 04 18:18:18 2018 -0500
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp Thu Jan 04 22:54:40 2018 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -3167,6 +3167,89 @@
return;
}
+ if (UseAddressNop && VM_Version::is_zx()) {
+ //
+ // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
+ // 1: 0x90
+ // 2: 0x66 0x90
+ // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
+ // 4: 0x0F 0x1F 0x40 0x00
+ // 5: 0x0F 0x1F 0x44 0x00 0x00
+ // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
+ // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+ // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+ // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+ // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+ // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+
+ // The rest coding is ZX specific - don't use consecutive address nops
+
+ // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+ // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+ // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+ // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+
+ while (i >= 15) {
+ // For ZX don't generate consecutive addess nops (mix with regular nops)
+ i -= 15;
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ addr_nop_8();
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8((unsigned char)0x90);
+ // nop
+ }
+ switch (i) {
+ case 14:
+ emit_int8(0x66); // size prefix
+ case 13:
+ emit_int8(0x66); // size prefix
+ case 12:
+ addr_nop_8();
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8((unsigned char)0x90);
+ // nop
+ break;
+ case 11:
+ emit_int8(0x66); // size prefix
+ case 10:
+ emit_int8(0x66); // size prefix
+ case 9:
+ emit_int8(0x66); // size prefix
+ case 8:
+ addr_nop_8();
+ break;
+ case 7:
+ addr_nop_7();
+ break;
+ case 6:
+ emit_int8(0x66); // size prefix
+ case 5:
+ addr_nop_5();
+ break;
+ case 4:
+ addr_nop_4();
+ break;
+ case 3:
+ // Don't use "0x0F 0x1F 0x00" - need patching safe padding
+ emit_int8(0x66); // size prefix
+ case 2:
+ emit_int8(0x66); // size prefix
+ case 1:
+ emit_int8((unsigned char)0x90);
+ // nop
+ break;
+ default:
+ assert(i == 0, " ");
+ }
+ return;
+ }
+
// Using nops with size prefixes "0x66 0x90".
// From AMD Optimization Guide:
// 1: 0x90
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp Thu Jan 04 18:18:18 2018 -0500
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp Thu Jan 04 22:54:40 2018 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -628,6 +628,11 @@
if (UseSSE < 1)
_features &= ~CPU_SSE;
+ //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
+ if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
+ UseAVX = 0;
+ }
+
// first try initial setting and detect what we can support
int use_avx_limit = 0;
if (UseAVX > 0) {
@@ -1078,6 +1083,66 @@
// UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
// UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
+
+ if (is_zx()) { // ZX cpus specific settings
+ if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
+ UseStoreImmI16 = false; // don't use it on ZX cpus
+ }
+ if ((cpu_family() == 6) || (cpu_family() == 7)) {
+ if (FLAG_IS_DEFAULT(UseAddressNop)) {
+ // Use it on all ZX cpus
+ UseAddressNop = true;
+ }
+ }
+ if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
+ UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
+ }
+ if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
+ if (supports_sse3()) {
+ UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
+ } else {
+ UseXmmRegToRegMoveAll = false;
+ }
+ }
+ if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
+#ifdef COMPILER2
+ if (FLAG_IS_DEFAULT(MaxLoopPad)) {
+ // For new ZX cpus do the next optimization:
+ // don't align the beginning of a loop if there are enough instructions
+ // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
+ // in current fetch line (OptoLoopAlignment) or the padding
+ // is big (> MaxLoopPad).
+ // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
+ // generated NOP instructions. 11 is the largest size of one
+ // address NOP instruction '0F 1F' (see Assembler::nop(i)).
+ MaxLoopPad = 11;
+ }
+#endif // COMPILER2
+ if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+ UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
+ }
+ if (supports_sse4_2()) { // new ZX cpus
+ if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+ UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
+ }
+ }
+ if (supports_sse4_2()) {
+ if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
+ }
+ } else {
+ if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
+ }
+ }
+
+ if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
+ FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
+ }
+ }
+
if( is_amd() ) { // AMD cpus specific settings
if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
// Use it on new AMD cpus starting from Opteron.
@@ -1374,6 +1439,14 @@
#endif
}
+ if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
+#ifdef COMPILER2
+ if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+ FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+ }
+#endif
+ }
+
#ifdef _LP64
// Prefetch settings
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp Thu Jan 04 18:18:18 2018 -0500
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp Thu Jan 04 22:54:40 2018 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -305,6 +305,9 @@
enum Extended_Family {
// AMD
CPU_FAMILY_AMD_11H = 0x11,
+ // ZX
+ CPU_FAMILY_ZX_CORE_F6 = 6,
+ CPU_FAMILY_ZX_CORE_F7 = 7,
// Intel
CPU_FAMILY_INTEL_CORE = 6,
CPU_MODEL_NEHALEM = 0x1e,
@@ -549,6 +552,16 @@
}
}
+ // ZX features.
+ if (is_zx()) {
+ if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
+ result |= CPU_LZCNT;
+ // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
+ if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
+ result |= CPU_3DNOW_PREFETCH;
+ }
+ }
+
return result;
}
@@ -657,6 +670,7 @@
static bool is_P6() { return cpu_family() >= 6; }
static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
+ static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS '
static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
@@ -680,6 +694,15 @@
}
} else if (is_amd()) {
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
+ } else if (is_zx()) {
+ bool supports_topology = supports_processor_topology();
+ if (supports_topology) {
+ result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
+ _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
+ }
+ if (!supports_topology || result == 0) {
+ result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
+ }
}
return result;
}
@@ -688,6 +711,8 @@
uint result = 1;
if (is_intel() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
+ } else if (is_zx() && supports_processor_topology()) {
+ result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
if (cpu_family() >= 0x17) {
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
@@ -705,6 +730,8 @@
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
} else if (is_amd()) {
result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
+ } else if (is_zx()) {
+ result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
}
if (result < 32) // not defined ?
result = 32; // 32 bytes by default on x86 and other x64