1 /* |
1 /* |
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. |
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * |
4 * |
5 * This code is free software; you can redistribute it and/or modify it |
5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as |
6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. |
7 * published by the Free Software Foundation. |
174 uint32_t cores_per_cpu : 8, |
174 uint32_t cores_per_cpu : 8, |
175 : 24; |
175 : 24; |
176 } bits; |
176 } bits; |
177 }; |
177 }; |
178 |
178 |
|
179 union ExtCpuid7Edx { |
|
180 uint32_t value; |
|
181 struct { |
|
182 uint32_t : 8, |
|
183 tsc_invariance : 1, |
|
184 : 23; |
|
185 } bits; |
|
186 }; |
|
187 |
179 protected: |
188 protected: |
180 static int _cpu; |
189 static int _cpu; |
181 static int _model; |
190 static int _model; |
182 static int _stepping; |
191 static int _stepping; |
183 static int _cpuFeatures; // features returned by the "cpuid" instruction |
192 static int _cpuFeatures; // features returned by the "cpuid" instruction |
184 // 0 if this instruction is not available |
193 // 0 if this instruction is not available |
185 static const char* _features_str; |
194 static const char* _features_str; |
186 |
195 |
187 enum { |
196 enum { |
188 CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) |
197 CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) |
189 CPU_CMOV = (1 << 1), |
198 CPU_CMOV = (1 << 1), |
190 CPU_FXSR = (1 << 2), |
199 CPU_FXSR = (1 << 2), |
191 CPU_HT = (1 << 3), |
200 CPU_HT = (1 << 3), |
192 CPU_MMX = (1 << 4), |
201 CPU_MMX = (1 << 4), |
193 CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions |
202 CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions |
194 // may not necessarily support other 3dnow instructions |
203 // may not necessarily support other 3dnow instructions |
195 CPU_SSE = (1 << 6), |
204 CPU_SSE = (1 << 6), |
196 CPU_SSE2 = (1 << 7), |
205 CPU_SSE2 = (1 << 7), |
197 CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) |
206 CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) |
198 CPU_SSSE3 = (1 << 9), |
207 CPU_SSSE3 = (1 << 9), |
199 CPU_SSE4A = (1 << 10), |
208 CPU_SSE4A = (1 << 10), |
200 CPU_SSE4_1 = (1 << 11), |
209 CPU_SSE4_1 = (1 << 11), |
201 CPU_SSE4_2 = (1 << 12), |
210 CPU_SSE4_2 = (1 << 12), |
202 CPU_POPCNT = (1 << 13), |
211 CPU_POPCNT = (1 << 13), |
203 CPU_LZCNT = (1 << 14) |
212 CPU_LZCNT = (1 << 14), |
204 } cpuFeatureFlags; |
213 CPU_TSC = (1 << 15), |
|
214 CPU_TSCINV = (1 << 16) |
|
215 } cpuFeatureFlags; |
|
216 |
|
217 enum { |
|
218 // AMD |
|
219 CPU_FAMILY_AMD_11H = 17, |
|
220 // Intel |
|
221 CPU_FAMILY_INTEL_CORE = 6, |
|
222 CPU_MODEL_NEHALEM_EP = 26, |
|
223 CPU_MODEL_WESTMERE_EP = 44, |
|
224 // CPU_MODEL_IVYBRIDGE_EP = ??, TODO - get real value |
|
225 CPU_MODEL_SANDYBRIDGE_EP = 45 |
|
226 } cpuExtendedFamily; |
205 |
227 |
206 // cpuid information block. All info derived from executing cpuid with |
228 // cpuid information block. All info derived from executing cpuid with |
207 // various function numbers is stored here. Intel and AMD info is |
229 // various function numbers is stored here. Intel and AMD info is |
208 // merged in this block: accessor methods disentangle it. |
230 // merged in this block: accessor methods disentangle it. |
209 // |
231 // |
268 uint32_t ext_cpuid5_eax; // unused currently |
290 uint32_t ext_cpuid5_eax; // unused currently |
269 uint32_t ext_cpuid5_ebx; // reserved |
291 uint32_t ext_cpuid5_ebx; // reserved |
270 ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) |
292 ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) |
271 ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) |
293 ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) |
272 |
294 |
|
295 // cpuid function 0x80000007 |
|
296 uint32_t ext_cpuid7_eax; // reserved |
|
297 uint32_t ext_cpuid7_ebx; // reserved |
|
298 uint32_t ext_cpuid7_ecx; // reserved |
|
299 ExtCpuid7Edx ext_cpuid7_edx; // tscinv |
|
300 |
273 // cpuid function 0x80000008 |
301 // cpuid function 0x80000008 |
274 uint32_t ext_cpuid8_eax; // unused currently |
302 uint32_t ext_cpuid8_eax; // unused currently |
275 uint32_t ext_cpuid8_ebx; // reserved |
303 uint32_t ext_cpuid8_ebx; // reserved |
276 ExtCpuid8Ecx ext_cpuid8_ecx; |
304 ExtCpuid8Ecx ext_cpuid8_ecx; |
277 uint32_t ext_cpuid8_edx; // reserved |
305 uint32_t ext_cpuid8_edx; // reserved |
284 static uint32_t extended_cpu_family() { |
312 static uint32_t extended_cpu_family() { |
285 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; |
313 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; |
286 result += _cpuid_info.std_cpuid1_eax.bits.ext_family; |
314 result += _cpuid_info.std_cpuid1_eax.bits.ext_family; |
287 return result; |
315 return result; |
288 } |
316 } |
|
317 |
289 static uint32_t extended_cpu_model() { |
318 static uint32_t extended_cpu_model() { |
290 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; |
319 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; |
291 result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; |
320 result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; |
292 return result; |
321 return result; |
293 } |
322 } |
|
323 |
294 static uint32_t cpu_stepping() { |
324 static uint32_t cpu_stepping() { |
295 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; |
325 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; |
296 return result; |
326 return result; |
297 } |
327 } |
|
328 |
298 static uint logical_processor_count() { |
329 static uint logical_processor_count() { |
299 uint result = threads_per_core(); |
330 uint result = threads_per_core(); |
300 return result; |
331 return result; |
301 } |
332 } |
|
333 |
302 static uint32_t feature_flags() { |
334 static uint32_t feature_flags() { |
303 uint32_t result = 0; |
335 uint32_t result = 0; |
304 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) |
336 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) |
305 result |= CPU_CX8; |
337 result |= CPU_CX8; |
306 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) |
338 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) |
326 result |= CPU_SSE4_1; |
358 result |= CPU_SSE4_1; |
327 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) |
359 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) |
328 result |= CPU_SSE4_2; |
360 result |= CPU_SSE4_2; |
329 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) |
361 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) |
330 result |= CPU_POPCNT; |
362 result |= CPU_POPCNT; |
|
363 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) |
|
364 result |= CPU_TSC; |
|
365 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) |
|
366 result |= CPU_TSCINV; |
331 |
367 |
332 // AMD features. |
368 // AMD features. |
333 if (is_amd()) { |
369 if (is_amd()) { |
334 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || |
370 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || |
335 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) |
371 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) |
350 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } |
386 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } |
351 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } |
387 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } |
352 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } |
388 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } |
353 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } |
389 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } |
354 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } |
390 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } |
|
391 static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); } |
355 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } |
392 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } |
356 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } |
393 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } |
357 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } |
394 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } |
358 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } |
395 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } |
359 |
396 |
380 // processors. Use the feature test functions below to |
417 // processors. Use the feature test functions below to |
381 // determine whether a particular instruction is supported. |
418 // determine whether a particular instruction is supported. |
382 // |
419 // |
383 static int cpu_family() { return _cpu;} |
420 static int cpu_family() { return _cpu;} |
384 static bool is_P6() { return cpu_family() >= 6; } |
421 static bool is_P6() { return cpu_family() >= 6; } |
385 |
|
386 static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' |
422 static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' |
387 static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' |
423 static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' |
388 |
424 |
389 static bool supports_processor_topology() { |
425 static bool supports_processor_topology() { |
390 return (_cpuid_info.std_max_function >= 0xB) && |
426 return (_cpuid_info.std_max_function >= 0xB) && |
445 static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } |
481 static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } |
446 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } |
482 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } |
447 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } |
483 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } |
448 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } |
484 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } |
449 static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } |
485 static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } |
450 // |
486 static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } |
|
487 |
|
488 // Intel features |
|
489 static bool is_intel_family_core() { return is_intel() && |
|
490 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } |
|
491 |
|
492 static bool is_intel_tsc_synched_at_init() { |
|
493 if (is_intel_family_core()) { |
|
494 uint32_t ext_model = extended_cpu_model(); |
|
495 if (ext_model == CPU_MODEL_NEHALEM_EP || |
|
496 ext_model == CPU_MODEL_WESTMERE_EP || |
|
497 // TODO ext_model == CPU_MODEL_IVYBRIDGE_EP || |
|
498 ext_model == CPU_MODEL_SANDYBRIDGE_EP) { |
|
499 // 2-socket invtsc support. EX versions with 4 sockets are not |
|
500 // guaranteed to synchronize tscs at initialization via a double |
|
501 // handshake. The tscs can be explicitly set in software. Code |
|
502 // that uses tsc values must be prepared for them to arbitrarily |
|
503 // jump backward or forward. |
|
504 return true; |
|
505 } |
|
506 } |
|
507 return false; |
|
508 } |
|
509 |
451 // AMD features |
510 // AMD features |
452 // |
|
453 static bool supports_3dnow_prefetch() { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; } |
511 static bool supports_3dnow_prefetch() { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; } |
454 static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } |
512 static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } |
455 static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } |
513 static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } |
456 static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } |
514 static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } |
|
515 |
|
516 static bool is_amd_Barcelona() { return is_amd() && |
|
517 extended_cpu_family() == CPU_FAMILY_AMD_11H; } |
|
518 |
|
519 // Intel and AMD newer cores support fast timestamps well |
|
520 static bool supports_tscinv_bit() { |
|
521 return (_cpuFeatures & CPU_TSCINV) != 0; |
|
522 } |
|
523 static bool supports_tscinv() { |
|
524 return supports_tscinv_bit() && |
|
525 ( (is_amd() && !is_amd_Barcelona()) || |
|
526 is_intel_tsc_synched_at_init() ); |
|
527 } |
457 |
528 |
458 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). |
529 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). |
459 static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && |
530 static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && |
460 supports_sse3() && _model != 0x1C; } |
531 supports_sse3() && _model != 0x1C; } |
461 |
532 |