mirror of
https://github.com/luscis/openlan.git
synced 2025-11-02 13:24:02 +08:00
clone from danieldin95
This commit is contained in:
425
vendor/github.com/templexxx/cpu/cpu_x86.go
generated
vendored
Normal file
425
vendor/github.com/templexxx/cpu/cpu_x86.go
generated
vendored
Normal file
@@ -0,0 +1,425 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const CacheLineSize = 64
|
||||
|
||||
// cpuid is implemented in cpu_x86.s.
|
||||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
|
||||
|
||||
// xgetbv with ecx = 0 is implemented in cpu_x86.s.
|
||||
func xgetbv() (eax, edx uint32)
|
||||
|
||||
const (
|
||||
// edx bits
|
||||
cpuid_SSE2 = 1 << 26
|
||||
|
||||
// ecx bits
|
||||
cpuid_SSE3 = 1 << 0
|
||||
cpuid_PCLMULQDQ = 1 << 1
|
||||
cpuid_SSSE3 = 1 << 9
|
||||
cpuid_FMA = 1 << 12
|
||||
cpuid_SSE41 = 1 << 19
|
||||
cpuid_SSE42 = 1 << 20
|
||||
cpuid_POPCNT = 1 << 23
|
||||
cpuid_AES = 1 << 25
|
||||
cpuid_OSXSAVE = 1 << 27
|
||||
cpuid_AVX = 1 << 28
|
||||
|
||||
// ebx bits
|
||||
cpuid_BMI1 = 1 << 3
|
||||
cpuid_AVX2 = 1 << 5
|
||||
cpuid_BMI2 = 1 << 8
|
||||
cpuid_ERMS = 1 << 9
|
||||
cpuid_ADX = 1 << 19
|
||||
cpuid_AVX512F = 1 << 16
|
||||
cpuid_AVX512DQ = 1 << 17
|
||||
cpuid_AVX512BW = 1 << 30
|
||||
cpuid_AVX512VL = 1 << 31
|
||||
|
||||
// edx bits
|
||||
cpuid_Invariant_TSC = 1 << 8
|
||||
)
|
||||
|
||||
func doinit() {
|
||||
options = []option{
|
||||
{"adx", &X86.HasADX},
|
||||
{"aes", &X86.HasAES},
|
||||
{"avx", &X86.HasAVX},
|
||||
{"avx2", &X86.HasAVX2},
|
||||
{"bmi1", &X86.HasBMI1},
|
||||
{"bmi2", &X86.HasBMI2},
|
||||
{"erms", &X86.HasERMS},
|
||||
{"fma", &X86.HasFMA},
|
||||
{"pclmulqdq", &X86.HasPCLMULQDQ},
|
||||
{"popcnt", &X86.HasPOPCNT},
|
||||
{"sse3", &X86.HasSSE3},
|
||||
{"sse41", &X86.HasSSE41},
|
||||
{"sse42", &X86.HasSSE42},
|
||||
{"ssse3", &X86.HasSSSE3},
|
||||
{"avx512f", &X86.HasAVX512F},
|
||||
{"avx512dq", &X86.HasAVX512DQ},
|
||||
{"avx512bw", &X86.HasAVX512BW},
|
||||
{"avx512vl", &X86.HasAVX512VL},
|
||||
{"invariant_tsc", &X86.HasInvariantTSC},
|
||||
|
||||
// sse2 set as last element so it can easily be removed again. See code below.
|
||||
{"sse2", &X86.HasSSE2},
|
||||
}
|
||||
|
||||
// Remove sse2 from options on amd64(p32) because SSE2 is a mandatory feature for these GOARCHs.
|
||||
if GOARCH == "amd64" || GOARCH == "amd64p32" {
|
||||
options = options[:len(options)-1]
|
||||
}
|
||||
|
||||
maxID, _, _, _ := cpuid(0, 0)
|
||||
|
||||
if maxID < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
_, _, ecx1, edx1 := cpuid(1, 0)
|
||||
X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
|
||||
|
||||
X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
|
||||
X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
|
||||
X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
|
||||
X86.HasFMA = isSet(ecx1, cpuid_FMA)
|
||||
X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
|
||||
X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
|
||||
X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
|
||||
X86.HasAES = isSet(ecx1, cpuid_AES)
|
||||
X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
|
||||
|
||||
osSupportsAVX := false
|
||||
osSupportsAVX512 := false
|
||||
// For XGETBV, OSXSAVE bit is required and sufficient.
|
||||
if X86.HasOSXSAVE {
|
||||
eax, _ := xgetbv()
|
||||
// Check if XMM and YMM registers have OS support.
|
||||
osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)
|
||||
// Check is ZMM registers have OS support.
|
||||
osSupportsAVX512 = isSet(eax>>5, 7) && isSet(eax>>1, 3)
|
||||
}
|
||||
|
||||
X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
|
||||
|
||||
if maxID < 7 {
|
||||
return
|
||||
}
|
||||
|
||||
_, ebx7, _, _ := cpuid(7, 0)
|
||||
X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
|
||||
X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
|
||||
X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512
|
||||
X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ) && osSupportsAVX512
|
||||
X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW) && osSupportsAVX512
|
||||
X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL) && osSupportsAVX512
|
||||
X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
|
||||
X86.HasERMS = isSet(ebx7, cpuid_ERMS)
|
||||
X86.HasADX = isSet(ebx7, cpuid_ADX)
|
||||
|
||||
X86.Cache = getCacheSize()
|
||||
|
||||
X86.HasInvariantTSC = hasInvariantTSC()
|
||||
|
||||
X86.Family, X86.Model = getFamilyModel()
|
||||
|
||||
X86.Signature = makeSignature(X86.Family, X86.Model)
|
||||
|
||||
X86.Name = getName()
|
||||
|
||||
X86.TSCFrequency = getNativeTSCFrequency(X86.Name, X86.Signature)
|
||||
}
|
||||
|
||||
func isSet(hwc uint32, value uint32) bool {
|
||||
return hwc&value != 0
|
||||
}
|
||||
|
||||
func hasInvariantTSC() bool {
|
||||
if maxExtendedFunction() < 0x80000007 {
|
||||
return false
|
||||
}
|
||||
_, _, _, edx := cpuid(0x80000007, 0)
|
||||
return isSet(edx, cpuid_Invariant_TSC)
|
||||
}
|
||||
|
||||
func getName() string {
|
||||
if maxExtendedFunction() >= 0x80000004 {
|
||||
v := make([]uint32, 0, 48)
|
||||
for i := uint32(0); i < 3; i++ {
|
||||
a, b, c, d := cpuid(0x80000002+i, 0)
|
||||
v = append(v, a, b, c, d)
|
||||
}
|
||||
return strings.Trim(string(valAsString(v...)), " ")
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// getNativeTSCFrequency gets TSC frequency from CPUID,
|
||||
// only supports Intel (Skylake or later microarchitecture) & key information is from Intel manual & kernel codes
|
||||
// (especially this commit: https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684).
|
||||
func getNativeTSCFrequency(name, sign string) uint64 {
|
||||
|
||||
if vendorID() != Intel {
|
||||
return 0
|
||||
}
|
||||
|
||||
if maxFunctionID() < 0x15 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// ApolloLake, GeminiLake, CannonLake (and presumably all new chipsets
|
||||
// from this point) report the crystal frequency directly via CPUID.0x15.
|
||||
// That's definitive data that we can rely upon.
|
||||
eax, ebx, ecx, _ := cpuid(0x15, 0)
|
||||
|
||||
// If ebx is 0, the TSC/”core crystal clock” ratio is not enumerated.
|
||||
// We won't provide TSC frequency detection in this situation.
|
||||
if eax == 0 || ebx == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Skylake, Kabylake and all variants of those two chipsets report a
|
||||
// crystal frequency of zero.
|
||||
if ecx == 0 { // Crystal clock frequency is not enumerated.
|
||||
ecx = getCrystalClockFrequency(sign)
|
||||
}
|
||||
|
||||
// TSC frequency = “core crystal clock frequency” * EBX/EAX.
|
||||
return uint64(ecx) * (uint64(ebx) / uint64(eax))
|
||||
}
|
||||
|
||||
// Copied from: CPUID Signature values of DisplayFamily and DisplayModel,
|
||||
// in Intel® 64 and IA-32 Architectures Software Developer’s Manual
|
||||
// Volume 4: Model-Specific Registers
|
||||
// & https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/intel-family.h
|
||||
const (
|
||||
IntelFam6SkylakeL = "06_4EH"
|
||||
IntelFam6Skylake = "06_5EH"
|
||||
IntelFam6SkylakeX = "06_55H"
|
||||
IntelFam6KabylakeL = "06_8EH"
|
||||
IntelFam6Kabylake = "06_9EH"
|
||||
)
|
||||
|
||||
// getCrystalClockFrequency gets crystal clock frequency
|
||||
// for Intel processors in which CPUID.15H.EBX[31:0] ÷ CPUID.0x15.EAX[31:0] is enumerated
|
||||
// but CPUID.15H.ECX is not enumerated using this function to get nominal core crystal clock frequency.
|
||||
//
|
||||
// Actually these crystal clock frequencies provided by Intel hardcoded tables are not so accurate in some cases,
|
||||
// e.g. SkyLake server CPU may have issue (All SKX subject the crystal to an EMI reduction circuit that
|
||||
//reduces its actual frequency by (approximately) -0.25%):
|
||||
// see https://lore.kernel.org/lkml/ff6dcea166e8ff8f2f6a03c17beab2cb436aa779.1513920414.git.len.brown@intel.com/
|
||||
// for more details.
|
||||
// With this report, I set a coefficient (0.9975) for IntelFam6SkyLakeX.
|
||||
//
|
||||
// Unlike the kernel way (mentioned in https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684),
|
||||
// I prefer the Intel hardcoded tables,
|
||||
// because after some testing (comparing with wall clock, see https://github.com/templexxx/tsc/tsc_test.go for more details),
|
||||
// I found hardcoded tables are more accurate.
|
||||
func getCrystalClockFrequency(sign string) uint32 {
|
||||
|
||||
if maxFunctionID() < 0x16 {
|
||||
return 0
|
||||
}
|
||||
|
||||
switch sign {
|
||||
case IntelFam6SkylakeL:
|
||||
return 24 * 1000 * 1000
|
||||
case IntelFam6Skylake:
|
||||
return 24 * 1000 * 1000
|
||||
case IntelFam6SkylakeX:
|
||||
return 25 * 1000 * 1000 * 0.9975
|
||||
case IntelFam6KabylakeL:
|
||||
return 24 * 1000 * 1000
|
||||
case IntelFam6Kabylake:
|
||||
return 24 * 1000 * 1000
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func getFamilyModel() (uint32, uint32) {
|
||||
if maxFunctionID() < 0x1 {
|
||||
return 0, 0
|
||||
}
|
||||
eax, _, _, _ := cpuid(1, 0)
|
||||
family := (eax >> 8) & 0xf
|
||||
displayFamily := family
|
||||
if family == 0xf {
|
||||
displayFamily = ((eax >> 20) & 0xff) + family
|
||||
}
|
||||
model := (eax >> 4) & 0xf
|
||||
displayModel := model
|
||||
if family == 0x6 || family == 0xf {
|
||||
displayModel = ((eax >> 12) & 0xf0) + model
|
||||
}
|
||||
return displayFamily, displayModel
|
||||
}
|
||||
|
||||
// signature format: XX_XXH
|
||||
func makeSignature(family, model uint32) string {
|
||||
signature := strings.ToUpper(fmt.Sprintf("0%x_0%xH", family, model))
|
||||
ss := strings.Split(signature, "_")
|
||||
for i, s := range ss {
|
||||
// Maybe insert too more `0`, drop it.
|
||||
if len(s) > 2 {
|
||||
s = s[1:]
|
||||
ss[i] = s
|
||||
}
|
||||
}
|
||||
return strings.Join(ss, "_")
|
||||
}
|
||||
|
||||
// getCacheSize is from
|
||||
// https://github.com/klauspost/cpuid/blob/5a626f7029c910cc8329dae5405ee4f65034bce5/cpuid.go#L723
|
||||
func getCacheSize() Cache {
|
||||
c := Cache{
|
||||
L1I: -1,
|
||||
L1D: -1,
|
||||
L2: -1,
|
||||
L3: -1,
|
||||
}
|
||||
|
||||
vendor := vendorID()
|
||||
switch vendor {
|
||||
case Intel:
|
||||
if maxFunctionID() < 4 {
|
||||
return c
|
||||
}
|
||||
for i := uint32(0); ; i++ {
|
||||
eax, ebx, ecx, _ := cpuid(4, i)
|
||||
cacheType := eax & 15
|
||||
if cacheType == 0 {
|
||||
break
|
||||
}
|
||||
cacheLevel := (eax >> 5) & 7
|
||||
coherency := int(ebx&0xfff) + 1
|
||||
partitions := int((ebx>>12)&0x3ff) + 1
|
||||
associativity := int((ebx>>22)&0x3ff) + 1
|
||||
sets := int(ecx) + 1
|
||||
size := associativity * partitions * coherency * sets
|
||||
switch cacheLevel {
|
||||
case 1:
|
||||
if cacheType == 1 {
|
||||
// 1 = Data Cache
|
||||
c.L1D = size
|
||||
} else if cacheType == 2 {
|
||||
// 2 = Instruction Cache
|
||||
c.L1I = size
|
||||
} else {
|
||||
if c.L1D < 0 {
|
||||
c.L1I = size
|
||||
}
|
||||
if c.L1I < 0 {
|
||||
c.L1I = size
|
||||
}
|
||||
}
|
||||
case 2:
|
||||
c.L2 = size
|
||||
case 3:
|
||||
c.L3 = size
|
||||
}
|
||||
}
|
||||
case AMD, Hygon:
|
||||
// Untested.
|
||||
if maxExtendedFunction() < 0x80000005 {
|
||||
return c
|
||||
}
|
||||
_, _, ecx, edx := cpuid(0x80000005, 0)
|
||||
c.L1D = int(((ecx >> 24) & 0xFF) * 1024)
|
||||
c.L1I = int(((edx >> 24) & 0xFF) * 1024)
|
||||
|
||||
if maxExtendedFunction() < 0x80000006 {
|
||||
return c
|
||||
}
|
||||
_, _, ecx, _ = cpuid(0x80000006, 0)
|
||||
c.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func maxFunctionID() uint32 {
|
||||
a, _, _, _ := cpuid(0, 0)
|
||||
return a
|
||||
}
|
||||
|
||||
func maxExtendedFunction() uint32 {
|
||||
eax, _, _, _ := cpuid(0x80000000, 0)
|
||||
return eax
|
||||
}
|
||||
|
||||
const (
|
||||
Other = iota
|
||||
Intel
|
||||
AMD
|
||||
VIA
|
||||
Transmeta
|
||||
NSC
|
||||
KVM // Kernel-based Virtual Machine
|
||||
MSVM // Microsoft Hyper-V or Windows Virtual PC
|
||||
VMware
|
||||
XenHVM
|
||||
Bhyve
|
||||
Hygon
|
||||
)
|
||||
|
||||
// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
|
||||
var vendorMapping = map[string]int{
|
||||
"AMDisbetter!": AMD,
|
||||
"AuthenticAMD": AMD,
|
||||
"CentaurHauls": VIA,
|
||||
"GenuineIntel": Intel,
|
||||
"TransmetaCPU": Transmeta,
|
||||
"GenuineTMx86": Transmeta,
|
||||
"Geode by NSC": NSC,
|
||||
"VIA VIA VIA ": VIA,
|
||||
"KVMKVMKVMKVM": KVM,
|
||||
"Microsoft Hv": MSVM,
|
||||
"VMwareVMware": VMware,
|
||||
"XenVMMXenVMM": XenHVM,
|
||||
"bhyve bhyve ": Bhyve,
|
||||
"HygonGenuine": Hygon,
|
||||
}
|
||||
|
||||
func vendorID() int {
|
||||
_, b, c, d := cpuid(0, 0)
|
||||
v := valAsString(b, d, c)
|
||||
vend, ok := vendorMapping[string(v)]
|
||||
if !ok {
|
||||
return Other
|
||||
}
|
||||
return vend
|
||||
}
|
||||
|
||||
func valAsString(values ...uint32) []byte {
|
||||
r := make([]byte, 4*len(values))
|
||||
for i, v := range values {
|
||||
dst := r[i*4:]
|
||||
dst[0] = byte(v & 0xff)
|
||||
dst[1] = byte((v >> 8) & 0xff)
|
||||
dst[2] = byte((v >> 16) & 0xff)
|
||||
dst[3] = byte((v >> 24) & 0xff)
|
||||
switch {
|
||||
case dst[0] == 0:
|
||||
return r[:i*4]
|
||||
case dst[1] == 0:
|
||||
return r[:i*4+1]
|
||||
case dst[2] == 0:
|
||||
return r[:i*4+2]
|
||||
case dst[3] == 0:
|
||||
return r[:i*4+3]
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
Reference in New Issue
Block a user