mirror of
https://github.com/luscis/openlan.git
synced 2025-10-07 17:40:54 +08:00
clone from danieldin95
This commit is contained in:
12
vendor/github.com/templexxx/cpu/.gitignore
generated
vendored
Normal file
12
vendor/github.com/templexxx/cpu/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.exe~
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, build with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
32
vendor/github.com/templexxx/cpu/LICENSE
generated
vendored
Normal file
32
vendor/github.com/templexxx/cpu/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2018 Temple3x (temple3x@gmail.com)
|
||||
Copyright 2017 The Go Authors
|
||||
Copyright (c) 2015 Klaus Post
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
23
vendor/github.com/templexxx/cpu/README.md
generated
vendored
Normal file
23
vendor/github.com/templexxx/cpu/README.md
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
# cpu
|
||||
internal/cpu(in Go standard lib) with these detections:
|
||||
|
||||
>- AVX512
|
||||
>
|
||||
>- Cache Size
|
||||
>
|
||||
>- Invariant TSC
|
||||
>
|
||||
|
||||
It also provides:
|
||||
|
||||
>- False sharing range, see `X86FalseSharingRange` for X86 platform.
|
||||
>
|
||||
>- TSC frequency
|
||||
>
|
||||
>- Name
|
||||
>
|
||||
>- Family & Model
|
||||
|
||||
# Acknowledgement
|
||||
|
||||
[klauspost/cpuid](https://github.com/klauspost/cpuid)
|
234
vendor/github.com/templexxx/cpu/cpu.go
generated
vendored
Normal file
234
vendor/github.com/templexxx/cpu/cpu.go
generated
vendored
Normal file
@@ -0,0 +1,234 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package cpu implements processor feature detection
|
||||
// used by the Go standard library.
|
||||
package cpu
|
||||
|
||||
// debugOptions is set to true by the runtime if go was compiled with GOEXPERIMENT=debugcpu
|
||||
// and GOOS is Linux or Darwin. This variable is linknamed in runtime/proc.go.
|
||||
var debugOptions bool
|
||||
|
||||
var X86 x86
|
||||
|
||||
// "Loads data or instructions from memory to the second-level cache.
|
||||
// To use the streamer, organize the data or instructions in blocks of 128 bytes,
|
||||
// aligned on 128 bytes."
|
||||
// From <Intel® 64 and IA-32 architectures optimization reference manual>,
|
||||
// in section 3.7.3 "Hardware Prefetching for Second-Level Cache"
|
||||
//
|
||||
// In practice, I have found use 128bytes can gain better performance than 64bytes (one cache line).
|
||||
const X86FalseSharingRange = 128
|
||||
|
||||
// The booleans in x86 contain the correspondingly named cpuid feature bit.
|
||||
// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
|
||||
// in addition to the cpuid feature bit being set.
|
||||
// The struct is padded to avoid false sharing.
|
||||
type x86 struct {
|
||||
_ [X86FalseSharingRange]byte
|
||||
HasAES bool
|
||||
HasADX bool
|
||||
HasAVX bool
|
||||
HasAVX2 bool
|
||||
HasAVX512F bool
|
||||
HasAVX512DQ bool
|
||||
HasAVX512BW bool
|
||||
HasAVX512VL bool
|
||||
HasBMI1 bool
|
||||
HasBMI2 bool
|
||||
HasERMS bool
|
||||
HasFMA bool
|
||||
HasOSXSAVE bool
|
||||
HasPCLMULQDQ bool
|
||||
HasPOPCNT bool
|
||||
HasSSE2 bool
|
||||
HasSSE3 bool
|
||||
HasSSSE3 bool
|
||||
HasSSE41 bool
|
||||
HasSSE42 bool
|
||||
// The invariant TSC will run at a constant rate in all ACPI P-, C-, and T-states.
|
||||
// This is the architectural behavior moving forward. On processors with
|
||||
// invariant TSC support, the OS may use the TSC for wall clock timer services (instead of ACPI or HPET timers).
|
||||
HasInvariantTSC bool
|
||||
|
||||
Cache Cache
|
||||
|
||||
// TSCFrequency only meaningful when HasInvariantTSC == true.
|
||||
// Unit: Hz.
|
||||
//
|
||||
// Warn:
|
||||
// 1. If it's 0, means can't get it. Don't use it.
|
||||
// 2. Don't use it if you want "100%" precise timestamp.
|
||||
TSCFrequency uint64
|
||||
|
||||
Name string
|
||||
Signature string // DisplayFamily_DisplayModel.
|
||||
Family uint32 // CPU family number.
|
||||
Model uint32 // CPU model number.
|
||||
|
||||
_ [X86FalseSharingRange]byte
|
||||
}
|
||||
|
||||
// CPU Cache Size.
|
||||
// -1 if undetected.
|
||||
type Cache struct {
|
||||
L1I int
|
||||
L1D int
|
||||
L2 int
|
||||
L3 int
|
||||
}
|
||||
|
||||
var PPC64 ppc64
|
||||
|
||||
// For ppc64x, it is safe to check only for ISA level starting on ISA v3.00,
|
||||
// since there are no optional categories. There are some exceptions that also
|
||||
// require kernel support to work (darn, scv), so there are feature bits for
|
||||
// those as well. The minimum processor requirement is POWER8 (ISA 2.07), so we
|
||||
// maintain some of the old feature checks for optional categories for
|
||||
// safety.
|
||||
// The struct is padded to avoid false sharing.
|
||||
type ppc64 struct {
|
||||
_ [CacheLineSize]byte
|
||||
HasVMX bool // Vector unit (Altivec)
|
||||
HasDFP bool // Decimal Floating Point unit
|
||||
HasVSX bool // Vector-scalar unit
|
||||
HasHTM bool // Hardware Transactional Memory
|
||||
HasISEL bool // Integer select
|
||||
HasVCRYPTO bool // Vector cryptography
|
||||
HasHTMNOSC bool // HTM: kernel-aborted transaction in syscalls
|
||||
HasDARN bool // Hardware random number generator (requires kernel enablement)
|
||||
HasSCV bool // Syscall vectored (requires kernel enablement)
|
||||
IsPOWER8 bool // ISA v2.07 (POWER8)
|
||||
IsPOWER9 bool // ISA v3.00 (POWER9)
|
||||
_ [CacheLineSize]byte
|
||||
}
|
||||
|
||||
var ARM64 arm64
|
||||
|
||||
// The booleans in arm64 contain the correspondingly named cpu feature bit.
|
||||
// The struct is padded to avoid false sharing.
|
||||
type arm64 struct {
|
||||
_ [CacheLineSize]byte
|
||||
HasFP bool
|
||||
HasASIMD bool
|
||||
HasEVTSTRM bool
|
||||
HasAES bool
|
||||
HasPMULL bool
|
||||
HasSHA1 bool
|
||||
HasSHA2 bool
|
||||
HasCRC32 bool
|
||||
HasATOMICS bool
|
||||
HasFPHP bool
|
||||
HasASIMDHP bool
|
||||
HasCPUID bool
|
||||
HasASIMDRDM bool
|
||||
HasJSCVT bool
|
||||
HasFCMA bool
|
||||
HasLRCPC bool
|
||||
HasDCPOP bool
|
||||
HasSHA3 bool
|
||||
HasSM3 bool
|
||||
HasSM4 bool
|
||||
HasASIMDDP bool
|
||||
HasSHA512 bool
|
||||
HasSVE bool
|
||||
HasASIMDFHM bool
|
||||
_ [CacheLineSize]byte
|
||||
}
|
||||
|
||||
var S390X s390x
|
||||
|
||||
type s390x struct {
|
||||
_ [CacheLineSize]byte
|
||||
HasZArch bool // z architecture mode is active [mandatory]
|
||||
HasSTFLE bool // store facility list extended [mandatory]
|
||||
HasLDisp bool // long (20-bit) displacements [mandatory]
|
||||
HasEImm bool // 32-bit immediates [mandatory]
|
||||
HasDFP bool // decimal floating point
|
||||
HasETF3Enhanced bool // ETF-3 enhanced
|
||||
HasMSA bool // message security assist (CPACF)
|
||||
HasAES bool // KM-AES{128,192,256} functions
|
||||
HasAESCBC bool // KMC-AES{128,192,256} functions
|
||||
HasAESCTR bool // KMCTR-AES{128,192,256} functions
|
||||
HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
|
||||
HasGHASH bool // KIMD-GHASH function
|
||||
HasSHA1 bool // K{I,L}MD-SHA-1 functions
|
||||
HasSHA256 bool // K{I,L}MD-SHA-256 functions
|
||||
HasSHA512 bool // K{I,L}MD-SHA-512 functions
|
||||
HasVX bool // vector facility. Note: the runtime sets this when it processes auxv records.
|
||||
_ [CacheLineSize]byte
|
||||
}
|
||||
|
||||
// initialize examines the processor and sets the relevant variables above.
|
||||
// This is called by the runtime package early in program initialization,
|
||||
// before normal init functions are run. env is set by runtime on Linux and Darwin
|
||||
// if go was compiled with GOEXPERIMENT=debugcpu.
|
||||
func init() {
|
||||
doinit()
|
||||
processOptions("")
|
||||
}
|
||||
|
||||
// options contains the cpu debug options that can be used in GODEBUGCPU.
|
||||
// Options are arch dependent and are added by the arch specific doinit functions.
|
||||
// Features that are mandatory for the specific GOARCH should not be added to options
|
||||
// (e.g. SSE2 on amd64).
|
||||
var options []option
|
||||
|
||||
// Option names should be lower case. e.g. avx instead of AVX.
|
||||
type option struct {
|
||||
Name string
|
||||
Feature *bool
|
||||
}
|
||||
|
||||
// processOptions disables CPU feature values based on the parsed env string.
|
||||
// The env string is expected to be of the form feature1=0,feature2=0...
|
||||
// where feature names is one of the architecture specifc list stored in the
|
||||
// cpu packages options variable. If env contains all=0 then all capabilities
|
||||
// referenced through the options variable are disabled. Other feature
|
||||
// names and values other than 0 are silently ignored.
|
||||
func processOptions(env string) {
|
||||
field:
|
||||
for env != "" {
|
||||
field := ""
|
||||
i := indexByte(env, ',')
|
||||
if i < 0 {
|
||||
field, env = env, ""
|
||||
} else {
|
||||
field, env = env[:i], env[i+1:]
|
||||
}
|
||||
i = indexByte(field, '=')
|
||||
if i < 0 {
|
||||
continue
|
||||
}
|
||||
key, value := field[:i], field[i+1:]
|
||||
|
||||
// Only allow turning off CPU features by specifying '0'.
|
||||
if value == "0" {
|
||||
if key == "all" {
|
||||
for _, v := range options {
|
||||
*v.Feature = false
|
||||
}
|
||||
return
|
||||
} else {
|
||||
for _, v := range options {
|
||||
if v.Name == key {
|
||||
*v.Feature = false
|
||||
continue field
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// indexByte returns the index of the first instance of c in s,
|
||||
// or -1 if c is not present in s.
|
||||
func indexByte(s string, c byte) int {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
7
vendor/github.com/templexxx/cpu/cpu_386.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_386.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const GOARCH = "386"
|
7
vendor/github.com/templexxx/cpu/cpu_amd64.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_amd64.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const GOARCH = "amd64"
|
7
vendor/github.com/templexxx/cpu/cpu_amd64p32.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_amd64p32.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const GOARCH = "amd64p32"
|
7
vendor/github.com/templexxx/cpu/cpu_arm.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_arm.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 32
|
102
vendor/github.com/templexxx/cpu/cpu_arm64.go
generated
vendored
Normal file
102
vendor/github.com/templexxx/cpu/cpu_arm64.go
generated
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 64
|
||||
|
||||
// arm64 doesn't have a 'cpuid' equivalent, so we rely on HWCAP/HWCAP2.
|
||||
// These are linknamed in runtime/os_linux_arm64.go and are initialized by
|
||||
// archauxv().
|
||||
var hwcap uint
|
||||
var hwcap2 uint
|
||||
|
||||
// HWCAP/HWCAP2 bits. These are exposed by Linux.
|
||||
const (
|
||||
hwcap_FP = (1 << 0)
|
||||
hwcap_ASIMD = (1 << 1)
|
||||
hwcap_EVTSTRM = (1 << 2)
|
||||
hwcap_AES = (1 << 3)
|
||||
hwcap_PMULL = (1 << 4)
|
||||
hwcap_SHA1 = (1 << 5)
|
||||
hwcap_SHA2 = (1 << 6)
|
||||
hwcap_CRC32 = (1 << 7)
|
||||
hwcap_ATOMICS = (1 << 8)
|
||||
hwcap_FPHP = (1 << 9)
|
||||
hwcap_ASIMDHP = (1 << 10)
|
||||
hwcap_CPUID = (1 << 11)
|
||||
hwcap_ASIMDRDM = (1 << 12)
|
||||
hwcap_JSCVT = (1 << 13)
|
||||
hwcap_FCMA = (1 << 14)
|
||||
hwcap_LRCPC = (1 << 15)
|
||||
hwcap_DCPOP = (1 << 16)
|
||||
hwcap_SHA3 = (1 << 17)
|
||||
hwcap_SM3 = (1 << 18)
|
||||
hwcap_SM4 = (1 << 19)
|
||||
hwcap_ASIMDDP = (1 << 20)
|
||||
hwcap_SHA512 = (1 << 21)
|
||||
hwcap_SVE = (1 << 22)
|
||||
hwcap_ASIMDFHM = (1 << 23)
|
||||
)
|
||||
|
||||
func doinit() {
|
||||
options = []option{
|
||||
{"evtstrm", &ARM64.HasEVTSTRM},
|
||||
{"aes", &ARM64.HasAES},
|
||||
{"pmull", &ARM64.HasPMULL},
|
||||
{"sha1", &ARM64.HasSHA1},
|
||||
{"sha2", &ARM64.HasSHA2},
|
||||
{"crc32", &ARM64.HasCRC32},
|
||||
{"atomics", &ARM64.HasATOMICS},
|
||||
{"fphp", &ARM64.HasFPHP},
|
||||
{"asimdhp", &ARM64.HasASIMDHP},
|
||||
{"cpuid", &ARM64.HasCPUID},
|
||||
{"asimdrdm", &ARM64.HasASIMDRDM},
|
||||
{"jscvt", &ARM64.HasJSCVT},
|
||||
{"fcma", &ARM64.HasFCMA},
|
||||
{"lrcpc", &ARM64.HasLRCPC},
|
||||
{"dcpop", &ARM64.HasDCPOP},
|
||||
{"sha3", &ARM64.HasSHA3},
|
||||
{"sm3", &ARM64.HasSM3},
|
||||
{"sm4", &ARM64.HasSM4},
|
||||
{"asimddp", &ARM64.HasASIMDDP},
|
||||
{"sha512", &ARM64.HasSHA512},
|
||||
{"sve", &ARM64.HasSVE},
|
||||
{"asimdfhm", &ARM64.HasASIMDFHM},
|
||||
|
||||
// These capabilities should always be enabled on arm64:
|
||||
// {"fp", &ARM64.HasFP},
|
||||
// {"asimd", &ARM64.HasASIMD},
|
||||
}
|
||||
|
||||
// HWCAP feature bits
|
||||
ARM64.HasFP = isSet(hwcap, hwcap_FP)
|
||||
ARM64.HasASIMD = isSet(hwcap, hwcap_ASIMD)
|
||||
ARM64.HasEVTSTRM = isSet(hwcap, hwcap_EVTSTRM)
|
||||
ARM64.HasAES = isSet(hwcap, hwcap_AES)
|
||||
ARM64.HasPMULL = isSet(hwcap, hwcap_PMULL)
|
||||
ARM64.HasSHA1 = isSet(hwcap, hwcap_SHA1)
|
||||
ARM64.HasSHA2 = isSet(hwcap, hwcap_SHA2)
|
||||
ARM64.HasCRC32 = isSet(hwcap, hwcap_CRC32)
|
||||
ARM64.HasATOMICS = isSet(hwcap, hwcap_ATOMICS)
|
||||
ARM64.HasFPHP = isSet(hwcap, hwcap_FPHP)
|
||||
ARM64.HasASIMDHP = isSet(hwcap, hwcap_ASIMDHP)
|
||||
ARM64.HasCPUID = isSet(hwcap, hwcap_CPUID)
|
||||
ARM64.HasASIMDRDM = isSet(hwcap, hwcap_ASIMDRDM)
|
||||
ARM64.HasJSCVT = isSet(hwcap, hwcap_JSCVT)
|
||||
ARM64.HasFCMA = isSet(hwcap, hwcap_FCMA)
|
||||
ARM64.HasLRCPC = isSet(hwcap, hwcap_LRCPC)
|
||||
ARM64.HasDCPOP = isSet(hwcap, hwcap_DCPOP)
|
||||
ARM64.HasSHA3 = isSet(hwcap, hwcap_SHA3)
|
||||
ARM64.HasSM3 = isSet(hwcap, hwcap_SM3)
|
||||
ARM64.HasSM4 = isSet(hwcap, hwcap_SM4)
|
||||
ARM64.HasASIMDDP = isSet(hwcap, hwcap_ASIMDDP)
|
||||
ARM64.HasSHA512 = isSet(hwcap, hwcap_SHA512)
|
||||
ARM64.HasSVE = isSet(hwcap, hwcap_SVE)
|
||||
ARM64.HasASIMDFHM = isSet(hwcap, hwcap_ASIMDFHM)
|
||||
}
|
||||
|
||||
func isSet(hwc uint, value uint) bool {
|
||||
return hwc&value != 0
|
||||
}
|
7
vendor/github.com/templexxx/cpu/cpu_mips.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_mips.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpu/cpu_mips64.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_mips64.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpu/cpu_mips64le.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_mips64le.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpu/cpu_mipsle.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_mipsle.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 32
|
16
vendor/github.com/templexxx/cpu/cpu_no_init.go
generated
vendored
Normal file
16
vendor/github.com/templexxx/cpu/cpu_no_init.go
generated
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !386
|
||||
// +build !amd64
|
||||
// +build !amd64p32
|
||||
// +build !arm64
|
||||
// +build !ppc64
|
||||
// +build !ppc64le
|
||||
// +build !s390x
|
||||
|
||||
package cpu
|
||||
|
||||
func doinit() {
|
||||
}
|
68
vendor/github.com/templexxx/cpu/cpu_ppc64x.go
generated
vendored
Normal file
68
vendor/github.com/templexxx/cpu/cpu_ppc64x.go
generated
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ppc64 ppc64le
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 128
|
||||
|
||||
// ppc64x doesn't have a 'cpuid' equivalent, so we rely on HWCAP/HWCAP2.
|
||||
// These are linknamed in runtime/os_linux_ppc64x.go and are initialized by
|
||||
// archauxv().
|
||||
var hwcap uint
|
||||
var hwcap2 uint
|
||||
|
||||
// HWCAP/HWCAP2 bits. These are exposed by the kernel.
|
||||
const (
|
||||
// ISA Level
|
||||
_PPC_FEATURE2_ARCH_2_07 = 0x80000000
|
||||
_PPC_FEATURE2_ARCH_3_00 = 0x00800000
|
||||
|
||||
// CPU features
|
||||
_PPC_FEATURE_HAS_ALTIVEC = 0x10000000
|
||||
_PPC_FEATURE_HAS_DFP = 0x00000400
|
||||
_PPC_FEATURE_HAS_VSX = 0x00000080
|
||||
_PPC_FEATURE2_HAS_HTM = 0x40000000
|
||||
_PPC_FEATURE2_HAS_ISEL = 0x08000000
|
||||
_PPC_FEATURE2_HAS_VEC_CRYPTO = 0x02000000
|
||||
_PPC_FEATURE2_HTM_NOSC = 0x01000000
|
||||
_PPC_FEATURE2_DARN = 0x00200000
|
||||
_PPC_FEATURE2_SCV = 0x00100000
|
||||
)
|
||||
|
||||
func doinit() {
|
||||
options = []option{
|
||||
{"htm", &PPC64.HasHTM},
|
||||
{"htmnosc", &PPC64.HasHTMNOSC},
|
||||
{"darn", &PPC64.HasDARN},
|
||||
{"scv", &PPC64.HasSCV},
|
||||
|
||||
// These capabilities should always be enabled on ppc64 and ppc64le:
|
||||
// {"vmx", &PPC64.HasVMX},
|
||||
// {"dfp", &PPC64.HasDFP},
|
||||
// {"vsx", &PPC64.HasVSX},
|
||||
// {"isel", &PPC64.HasISEL},
|
||||
// {"vcrypto", &PPC64.HasVCRYPTO},
|
||||
}
|
||||
|
||||
// HWCAP feature bits
|
||||
PPC64.HasVMX = isSet(hwcap, _PPC_FEATURE_HAS_ALTIVEC)
|
||||
PPC64.HasDFP = isSet(hwcap, _PPC_FEATURE_HAS_DFP)
|
||||
PPC64.HasVSX = isSet(hwcap, _PPC_FEATURE_HAS_VSX)
|
||||
|
||||
// HWCAP2 feature bits
|
||||
PPC64.IsPOWER8 = isSet(hwcap2, _PPC_FEATURE2_ARCH_2_07)
|
||||
PPC64.HasHTM = isSet(hwcap2, _PPC_FEATURE2_HAS_HTM)
|
||||
PPC64.HasISEL = isSet(hwcap2, _PPC_FEATURE2_HAS_ISEL)
|
||||
PPC64.HasVCRYPTO = isSet(hwcap2, _PPC_FEATURE2_HAS_VEC_CRYPTO)
|
||||
PPC64.HasHTMNOSC = isSet(hwcap2, _PPC_FEATURE2_HTM_NOSC)
|
||||
PPC64.IsPOWER9 = isSet(hwcap2, _PPC_FEATURE2_ARCH_3_00)
|
||||
PPC64.HasDARN = isSet(hwcap2, _PPC_FEATURE2_DARN)
|
||||
PPC64.HasSCV = isSet(hwcap2, _PPC_FEATURE2_SCV)
|
||||
}
|
||||
|
||||
func isSet(hwc uint, value uint) bool {
|
||||
return hwc&value != 0
|
||||
}
|
153
vendor/github.com/templexxx/cpu/cpu_s390x.go
generated
vendored
Normal file
153
vendor/github.com/templexxx/cpu/cpu_s390x.go
generated
vendored
Normal file
@@ -0,0 +1,153 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 256
|
||||
|
||||
// bitIsSet reports whether the bit at index is set. The bit index
|
||||
// is in big endian order, so bit index 0 is the leftmost bit.
|
||||
func bitIsSet(bits []uint64, index uint) bool {
|
||||
return bits[index/64]&((1<<63)>>(index%64)) != 0
|
||||
}
|
||||
|
||||
// function is the function code for the named function.
|
||||
type function uint8
|
||||
|
||||
const (
|
||||
// KM{,A,C,CTR} function codes
|
||||
aes128 function = 18 // AES-128
|
||||
aes192 = 19 // AES-192
|
||||
aes256 = 20 // AES-256
|
||||
|
||||
// K{I,L}MD function codes
|
||||
sha1 = 1 // SHA-1
|
||||
sha256 = 2 // SHA-256
|
||||
sha512 = 3 // SHA-512
|
||||
|
||||
// KLMD function codes
|
||||
ghash = 65 // GHASH
|
||||
)
|
||||
|
||||
// queryResult contains the result of a Query function
|
||||
// call. Bits are numbered in big endian order so the
|
||||
// leftmost bit (the MSB) is at index 0.
|
||||
type queryResult struct {
|
||||
bits [2]uint64
|
||||
}
|
||||
|
||||
// Has reports whether the given functions are present.
|
||||
func (q *queryResult) Has(fns ...function) bool {
|
||||
if len(fns) == 0 {
|
||||
panic("no function codes provided")
|
||||
}
|
||||
for _, f := range fns {
|
||||
if !bitIsSet(q.bits[:], uint(f)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// facility is a bit index for the named facility.
|
||||
type facility uint8
|
||||
|
||||
const (
|
||||
// mandatory facilities
|
||||
zarch facility = 1 // z architecture mode is active
|
||||
stflef = 7 // store-facility-list-extended
|
||||
ldisp = 18 // long-displacement
|
||||
eimm = 21 // extended-immediate
|
||||
|
||||
// miscellaneous facilities
|
||||
dfp = 42 // decimal-floating-point
|
||||
etf3eh = 30 // extended-translation 3 enhancement
|
||||
|
||||
// cryptography facilities
|
||||
msa = 17 // message-security-assist
|
||||
msa3 = 76 // message-security-assist extension 3
|
||||
msa4 = 77 // message-security-assist extension 4
|
||||
msa5 = 57 // message-security-assist extension 5
|
||||
msa8 = 146 // message-security-assist extension 8
|
||||
|
||||
// Note: vx and highgprs are excluded because they require
|
||||
// kernel support and so must be fetched from HWCAP.
|
||||
)
|
||||
|
||||
// facilityList contains the result of an STFLE call.
|
||||
// Bits are numbered in big endian order so the
|
||||
// leftmost bit (the MSB) is at index 0.
|
||||
type facilityList struct {
|
||||
bits [4]uint64
|
||||
}
|
||||
|
||||
// Has reports whether the given facilities are present.
|
||||
func (s *facilityList) Has(fs ...facility) bool {
|
||||
if len(fs) == 0 {
|
||||
panic("no facility bits provided")
|
||||
}
|
||||
for _, f := range fs {
|
||||
if !bitIsSet(s.bits[:], uint(f)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// The following feature detection functions are defined in cpu_s390x.s.
|
||||
// They are likely to be expensive to call so the results should be cached.
|
||||
func stfle() facilityList
|
||||
func kmQuery() queryResult
|
||||
func kmcQuery() queryResult
|
||||
func kmctrQuery() queryResult
|
||||
func kmaQuery() queryResult
|
||||
func kimdQuery() queryResult
|
||||
func klmdQuery() queryResult
|
||||
|
||||
func doinit() {
|
||||
options = []option{
|
||||
{"zarch", &S390X.HasZArch},
|
||||
{"stfle", &S390X.HasSTFLE},
|
||||
{"ldisp", &S390X.HasLDisp},
|
||||
{"msa", &S390X.HasMSA},
|
||||
{"eimm", &S390X.HasEImm},
|
||||
{"dfp", &S390X.HasDFP},
|
||||
{"etf3eh", &S390X.HasETF3Enhanced},
|
||||
{"vx", &S390X.HasVX},
|
||||
}
|
||||
|
||||
aes := []function{aes128, aes192, aes256}
|
||||
facilities := stfle()
|
||||
|
||||
S390X.HasZArch = facilities.Has(zarch)
|
||||
S390X.HasSTFLE = facilities.Has(stflef)
|
||||
S390X.HasLDisp = facilities.Has(ldisp)
|
||||
S390X.HasEImm = facilities.Has(eimm)
|
||||
S390X.HasDFP = facilities.Has(dfp)
|
||||
S390X.HasETF3Enhanced = facilities.Has(etf3eh)
|
||||
S390X.HasMSA = facilities.Has(msa)
|
||||
|
||||
if S390X.HasMSA {
|
||||
// cipher message
|
||||
km, kmc := kmQuery(), kmcQuery()
|
||||
S390X.HasAES = km.Has(aes...)
|
||||
S390X.HasAESCBC = kmc.Has(aes...)
|
||||
if facilities.Has(msa4) {
|
||||
kmctr := kmctrQuery()
|
||||
S390X.HasAESCTR = kmctr.Has(aes...)
|
||||
}
|
||||
if facilities.Has(msa8) {
|
||||
kma := kmaQuery()
|
||||
S390X.HasAESGCM = kma.Has(aes...)
|
||||
}
|
||||
|
||||
// compute message digest
|
||||
kimd := kimdQuery() // intermediate (no padding)
|
||||
klmd := klmdQuery() // last (padding)
|
||||
S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1)
|
||||
S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256)
|
||||
S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512)
|
||||
S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist
|
||||
}
|
||||
}
|
55
vendor/github.com/templexxx/cpu/cpu_s390x.s
generated
vendored
Normal file
55
vendor/github.com/templexxx/cpu/cpu_s390x.s
generated
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func stfle() facilityList
|
||||
TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32
|
||||
MOVD $ret+0(FP), R1
|
||||
MOVD $3, R0 // last doubleword index to store
|
||||
XC $32, (R1), (R1) // clear 4 doublewords (32 bytes)
|
||||
WORD $0xb2b01000 // store facility list extended (STFLE)
|
||||
RET
|
||||
|
||||
// func kmQuery() queryResult
|
||||
TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16
|
||||
MOVD $0, R0 // set function code to 0 (KM-Query)
|
||||
MOVD $ret+0(FP), R1 // address of 16-byte return value
|
||||
WORD $0xB92E0024 // cipher message (KM)
|
||||
RET
|
||||
|
||||
// func kmcQuery() queryResult
|
||||
TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16
|
||||
MOVD $0, R0 // set function code to 0 (KMC-Query)
|
||||
MOVD $ret+0(FP), R1 // address of 16-byte return value
|
||||
WORD $0xB92F0024 // cipher message with chaining (KMC)
|
||||
RET
|
||||
|
||||
// func kmctrQuery() queryResult
|
||||
TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16
|
||||
MOVD $0, R0 // set function code to 0 (KMCTR-Query)
|
||||
MOVD $ret+0(FP), R1 // address of 16-byte return value
|
||||
WORD $0xB92D4024 // cipher message with counter (KMCTR)
|
||||
RET
|
||||
|
||||
// func kmaQuery() queryResult
|
||||
TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16
|
||||
MOVD $0, R0 // set function code to 0 (KMA-Query)
|
||||
MOVD $ret+0(FP), R1 // address of 16-byte return value
|
||||
WORD $0xb9296024 // cipher message with authentication (KMA)
|
||||
RET
|
||||
|
||||
// func kimdQuery() queryResult
|
||||
TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16
|
||||
MOVD $0, R0 // set function code to 0 (KIMD-Query)
|
||||
MOVD $ret+0(FP), R1 // address of 16-byte return value
|
||||
WORD $0xB93E0024 // compute intermediate message digest (KIMD)
|
||||
RET
|
||||
|
||||
// func klmdQuery() queryResult
|
||||
TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16
|
||||
MOVD $0, R0 // set function code to 0 (KLMD-Query)
|
||||
MOVD $ret+0(FP), R1 // address of 16-byte return value
|
||||
WORD $0xB93F0024 // compute last message digest (KLMD)
|
||||
RET
|
7
vendor/github.com/templexxx/cpu/cpu_wasm.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpu/cpu_wasm.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const CacheLineSize = 64
|
425
vendor/github.com/templexxx/cpu/cpu_x86.go
generated
vendored
Normal file
425
vendor/github.com/templexxx/cpu/cpu_x86.go
generated
vendored
Normal file
@@ -0,0 +1,425 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const CacheLineSize = 64
|
||||
|
||||
// cpuid is implemented in cpu_x86.s.
|
||||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
|
||||
|
||||
// xgetbv with ecx = 0 is implemented in cpu_x86.s.
|
||||
func xgetbv() (eax, edx uint32)
|
||||
|
||||
const (
|
||||
// edx bits
|
||||
cpuid_SSE2 = 1 << 26
|
||||
|
||||
// ecx bits
|
||||
cpuid_SSE3 = 1 << 0
|
||||
cpuid_PCLMULQDQ = 1 << 1
|
||||
cpuid_SSSE3 = 1 << 9
|
||||
cpuid_FMA = 1 << 12
|
||||
cpuid_SSE41 = 1 << 19
|
||||
cpuid_SSE42 = 1 << 20
|
||||
cpuid_POPCNT = 1 << 23
|
||||
cpuid_AES = 1 << 25
|
||||
cpuid_OSXSAVE = 1 << 27
|
||||
cpuid_AVX = 1 << 28
|
||||
|
||||
// ebx bits
|
||||
cpuid_BMI1 = 1 << 3
|
||||
cpuid_AVX2 = 1 << 5
|
||||
cpuid_BMI2 = 1 << 8
|
||||
cpuid_ERMS = 1 << 9
|
||||
cpuid_ADX = 1 << 19
|
||||
cpuid_AVX512F = 1 << 16
|
||||
cpuid_AVX512DQ = 1 << 17
|
||||
cpuid_AVX512BW = 1 << 30
|
||||
cpuid_AVX512VL = 1 << 31
|
||||
|
||||
// edx bits
|
||||
cpuid_Invariant_TSC = 1 << 8
|
||||
)
|
||||
|
||||
func doinit() {
|
||||
options = []option{
|
||||
{"adx", &X86.HasADX},
|
||||
{"aes", &X86.HasAES},
|
||||
{"avx", &X86.HasAVX},
|
||||
{"avx2", &X86.HasAVX2},
|
||||
{"bmi1", &X86.HasBMI1},
|
||||
{"bmi2", &X86.HasBMI2},
|
||||
{"erms", &X86.HasERMS},
|
||||
{"fma", &X86.HasFMA},
|
||||
{"pclmulqdq", &X86.HasPCLMULQDQ},
|
||||
{"popcnt", &X86.HasPOPCNT},
|
||||
{"sse3", &X86.HasSSE3},
|
||||
{"sse41", &X86.HasSSE41},
|
||||
{"sse42", &X86.HasSSE42},
|
||||
{"ssse3", &X86.HasSSSE3},
|
||||
{"avx512f", &X86.HasAVX512F},
|
||||
{"avx512dq", &X86.HasAVX512DQ},
|
||||
{"avx512bw", &X86.HasAVX512BW},
|
||||
{"avx512vl", &X86.HasAVX512VL},
|
||||
{"invariant_tsc", &X86.HasInvariantTSC},
|
||||
|
||||
// sse2 set as last element so it can easily be removed again. See code below.
|
||||
{"sse2", &X86.HasSSE2},
|
||||
}
|
||||
|
||||
// Remove sse2 from options on amd64(p32) because SSE2 is a mandatory feature for these GOARCHs.
|
||||
if GOARCH == "amd64" || GOARCH == "amd64p32" {
|
||||
options = options[:len(options)-1]
|
||||
}
|
||||
|
||||
maxID, _, _, _ := cpuid(0, 0)
|
||||
|
||||
if maxID < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
_, _, ecx1, edx1 := cpuid(1, 0)
|
||||
X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
|
||||
|
||||
X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
|
||||
X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
|
||||
X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
|
||||
X86.HasFMA = isSet(ecx1, cpuid_FMA)
|
||||
X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
|
||||
X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
|
||||
X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
|
||||
X86.HasAES = isSet(ecx1, cpuid_AES)
|
||||
X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
|
||||
|
||||
osSupportsAVX := false
|
||||
osSupportsAVX512 := false
|
||||
// For XGETBV, OSXSAVE bit is required and sufficient.
|
||||
if X86.HasOSXSAVE {
|
||||
eax, _ := xgetbv()
|
||||
// Check if XMM and YMM registers have OS support.
|
||||
osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)
|
||||
// Check is ZMM registers have OS support.
|
||||
osSupportsAVX512 = isSet(eax>>5, 7) && isSet(eax>>1, 3)
|
||||
}
|
||||
|
||||
X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
|
||||
|
||||
if maxID < 7 {
|
||||
return
|
||||
}
|
||||
|
||||
_, ebx7, _, _ := cpuid(7, 0)
|
||||
X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
|
||||
X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
|
||||
X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512
|
||||
X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ) && osSupportsAVX512
|
||||
X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW) && osSupportsAVX512
|
||||
X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL) && osSupportsAVX512
|
||||
X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
|
||||
X86.HasERMS = isSet(ebx7, cpuid_ERMS)
|
||||
X86.HasADX = isSet(ebx7, cpuid_ADX)
|
||||
|
||||
X86.Cache = getCacheSize()
|
||||
|
||||
X86.HasInvariantTSC = hasInvariantTSC()
|
||||
|
||||
X86.Family, X86.Model = getFamilyModel()
|
||||
|
||||
X86.Signature = makeSignature(X86.Family, X86.Model)
|
||||
|
||||
X86.Name = getName()
|
||||
|
||||
X86.TSCFrequency = getNativeTSCFrequency(X86.Name, X86.Signature)
|
||||
}
|
||||
|
||||
func isSet(hwc uint32, value uint32) bool {
|
||||
return hwc&value != 0
|
||||
}
|
||||
|
||||
func hasInvariantTSC() bool {
|
||||
if maxExtendedFunction() < 0x80000007 {
|
||||
return false
|
||||
}
|
||||
_, _, _, edx := cpuid(0x80000007, 0)
|
||||
return isSet(edx, cpuid_Invariant_TSC)
|
||||
}
|
||||
|
||||
func getName() string {
|
||||
if maxExtendedFunction() >= 0x80000004 {
|
||||
v := make([]uint32, 0, 48)
|
||||
for i := uint32(0); i < 3; i++ {
|
||||
a, b, c, d := cpuid(0x80000002+i, 0)
|
||||
v = append(v, a, b, c, d)
|
||||
}
|
||||
return strings.Trim(string(valAsString(v...)), " ")
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// getNativeTSCFrequency gets TSC frequency from CPUID,
|
||||
// only supports Intel (Skylake or later microarchitecture) & key information is from Intel manual & kernel codes
|
||||
// (especially this commit: https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684).
|
||||
func getNativeTSCFrequency(name, sign string) uint64 {
|
||||
|
||||
if vendorID() != Intel {
|
||||
return 0
|
||||
}
|
||||
|
||||
if maxFunctionID() < 0x15 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// ApolloLake, GeminiLake, CannonLake (and presumably all new chipsets
|
||||
// from this point) report the crystal frequency directly via CPUID.0x15.
|
||||
// That's definitive data that we can rely upon.
|
||||
eax, ebx, ecx, _ := cpuid(0x15, 0)
|
||||
|
||||
// If ebx is 0, the TSC/”core crystal clock” ratio is not enumerated.
|
||||
// We won't provide TSC frequency detection in this situation.
|
||||
if eax == 0 || ebx == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Skylake, Kabylake and all variants of those two chipsets report a
|
||||
// crystal frequency of zero.
|
||||
if ecx == 0 { // Crystal clock frequency is not enumerated.
|
||||
ecx = getCrystalClockFrequency(sign)
|
||||
}
|
||||
|
||||
// TSC frequency = “core crystal clock frequency” * EBX/EAX.
|
||||
return uint64(ecx) * (uint64(ebx) / uint64(eax))
|
||||
}
|
||||
|
||||
// Copied from: CPUID Signature values of DisplayFamily and DisplayModel,
|
||||
// in Intel® 64 and IA-32 Architectures Software Developer’s Manual
|
||||
// Volume 4: Model-Specific Registers
|
||||
// & https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/intel-family.h
|
||||
const (
|
||||
IntelFam6SkylakeL = "06_4EH"
|
||||
IntelFam6Skylake = "06_5EH"
|
||||
IntelFam6SkylakeX = "06_55H"
|
||||
IntelFam6KabylakeL = "06_8EH"
|
||||
IntelFam6Kabylake = "06_9EH"
|
||||
)
|
||||
|
||||
// getCrystalClockFrequency gets crystal clock frequency
|
||||
// for Intel processors in which CPUID.15H.EBX[31:0] ÷ CPUID.0x15.EAX[31:0] is enumerated
|
||||
// but CPUID.15H.ECX is not enumerated using this function to get nominal core crystal clock frequency.
|
||||
//
|
||||
// Actually these crystal clock frequencies provided by Intel hardcoded tables are not so accurate in some cases,
|
||||
// e.g. SkyLake server CPU may have issue (All SKX subject the crystal to an EMI reduction circuit that
|
||||
//reduces its actual frequency by (approximately) -0.25%):
|
||||
// see https://lore.kernel.org/lkml/ff6dcea166e8ff8f2f6a03c17beab2cb436aa779.1513920414.git.len.brown@intel.com/
|
||||
// for more details.
|
||||
// With this report, I set a coefficient (0.9975) for IntelFam6SkyLakeX.
|
||||
//
|
||||
// Unlike the kernel way (mentioned in https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684),
|
||||
// I prefer the Intel hardcoded tables,
|
||||
// because after some testing (comparing with wall clock, see https://github.com/templexxx/tsc/tsc_test.go for more details),
|
||||
// I found hardcoded tables are more accurate.
|
||||
func getCrystalClockFrequency(sign string) uint32 {
|
||||
|
||||
if maxFunctionID() < 0x16 {
|
||||
return 0
|
||||
}
|
||||
|
||||
switch sign {
|
||||
case IntelFam6SkylakeL:
|
||||
return 24 * 1000 * 1000
|
||||
case IntelFam6Skylake:
|
||||
return 24 * 1000 * 1000
|
||||
case IntelFam6SkylakeX:
|
||||
return 25 * 1000 * 1000 * 0.9975
|
||||
case IntelFam6KabylakeL:
|
||||
return 24 * 1000 * 1000
|
||||
case IntelFam6Kabylake:
|
||||
return 24 * 1000 * 1000
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func getFamilyModel() (uint32, uint32) {
|
||||
if maxFunctionID() < 0x1 {
|
||||
return 0, 0
|
||||
}
|
||||
eax, _, _, _ := cpuid(1, 0)
|
||||
family := (eax >> 8) & 0xf
|
||||
displayFamily := family
|
||||
if family == 0xf {
|
||||
displayFamily = ((eax >> 20) & 0xff) + family
|
||||
}
|
||||
model := (eax >> 4) & 0xf
|
||||
displayModel := model
|
||||
if family == 0x6 || family == 0xf {
|
||||
displayModel = ((eax >> 12) & 0xf0) + model
|
||||
}
|
||||
return displayFamily, displayModel
|
||||
}
|
||||
|
||||
// signature format: XX_XXH
|
||||
func makeSignature(family, model uint32) string {
|
||||
signature := strings.ToUpper(fmt.Sprintf("0%x_0%xH", family, model))
|
||||
ss := strings.Split(signature, "_")
|
||||
for i, s := range ss {
|
||||
// Maybe insert too more `0`, drop it.
|
||||
if len(s) > 2 {
|
||||
s = s[1:]
|
||||
ss[i] = s
|
||||
}
|
||||
}
|
||||
return strings.Join(ss, "_")
|
||||
}
|
||||
|
||||
// getCacheSize is from
|
||||
// https://github.com/klauspost/cpuid/blob/5a626f7029c910cc8329dae5405ee4f65034bce5/cpuid.go#L723
|
||||
func getCacheSize() Cache {
|
||||
c := Cache{
|
||||
L1I: -1,
|
||||
L1D: -1,
|
||||
L2: -1,
|
||||
L3: -1,
|
||||
}
|
||||
|
||||
vendor := vendorID()
|
||||
switch vendor {
|
||||
case Intel:
|
||||
if maxFunctionID() < 4 {
|
||||
return c
|
||||
}
|
||||
for i := uint32(0); ; i++ {
|
||||
eax, ebx, ecx, _ := cpuid(4, i)
|
||||
cacheType := eax & 15
|
||||
if cacheType == 0 {
|
||||
break
|
||||
}
|
||||
cacheLevel := (eax >> 5) & 7
|
||||
coherency := int(ebx&0xfff) + 1
|
||||
partitions := int((ebx>>12)&0x3ff) + 1
|
||||
associativity := int((ebx>>22)&0x3ff) + 1
|
||||
sets := int(ecx) + 1
|
||||
size := associativity * partitions * coherency * sets
|
||||
switch cacheLevel {
|
||||
case 1:
|
||||
if cacheType == 1 {
|
||||
// 1 = Data Cache
|
||||
c.L1D = size
|
||||
} else if cacheType == 2 {
|
||||
// 2 = Instruction Cache
|
||||
c.L1I = size
|
||||
} else {
|
||||
if c.L1D < 0 {
|
||||
c.L1I = size
|
||||
}
|
||||
if c.L1I < 0 {
|
||||
c.L1I = size
|
||||
}
|
||||
}
|
||||
case 2:
|
||||
c.L2 = size
|
||||
case 3:
|
||||
c.L3 = size
|
||||
}
|
||||
}
|
||||
case AMD, Hygon:
|
||||
// Untested.
|
||||
if maxExtendedFunction() < 0x80000005 {
|
||||
return c
|
||||
}
|
||||
_, _, ecx, edx := cpuid(0x80000005, 0)
|
||||
c.L1D = int(((ecx >> 24) & 0xFF) * 1024)
|
||||
c.L1I = int(((edx >> 24) & 0xFF) * 1024)
|
||||
|
||||
if maxExtendedFunction() < 0x80000006 {
|
||||
return c
|
||||
}
|
||||
_, _, ecx, _ = cpuid(0x80000006, 0)
|
||||
c.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func maxFunctionID() uint32 {
|
||||
a, _, _, _ := cpuid(0, 0)
|
||||
return a
|
||||
}
|
||||
|
||||
func maxExtendedFunction() uint32 {
|
||||
eax, _, _, _ := cpuid(0x80000000, 0)
|
||||
return eax
|
||||
}
|
||||
|
||||
const (
|
||||
Other = iota
|
||||
Intel
|
||||
AMD
|
||||
VIA
|
||||
Transmeta
|
||||
NSC
|
||||
KVM // Kernel-based Virtual Machine
|
||||
MSVM // Microsoft Hyper-V or Windows Virtual PC
|
||||
VMware
|
||||
XenHVM
|
||||
Bhyve
|
||||
Hygon
|
||||
)
|
||||
|
||||
// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
|
||||
var vendorMapping = map[string]int{
|
||||
"AMDisbetter!": AMD,
|
||||
"AuthenticAMD": AMD,
|
||||
"CentaurHauls": VIA,
|
||||
"GenuineIntel": Intel,
|
||||
"TransmetaCPU": Transmeta,
|
||||
"GenuineTMx86": Transmeta,
|
||||
"Geode by NSC": NSC,
|
||||
"VIA VIA VIA ": VIA,
|
||||
"KVMKVMKVMKVM": KVM,
|
||||
"Microsoft Hv": MSVM,
|
||||
"VMwareVMware": VMware,
|
||||
"XenVMMXenVMM": XenHVM,
|
||||
"bhyve bhyve ": Bhyve,
|
||||
"HygonGenuine": Hygon,
|
||||
}
|
||||
|
||||
func vendorID() int {
|
||||
_, b, c, d := cpuid(0, 0)
|
||||
v := valAsString(b, d, c)
|
||||
vend, ok := vendorMapping[string(v)]
|
||||
if !ok {
|
||||
return Other
|
||||
}
|
||||
return vend
|
||||
}
|
||||
|
||||
func valAsString(values ...uint32) []byte {
|
||||
r := make([]byte, 4*len(values))
|
||||
for i, v := range values {
|
||||
dst := r[i*4:]
|
||||
dst[0] = byte(v & 0xff)
|
||||
dst[1] = byte((v >> 8) & 0xff)
|
||||
dst[2] = byte((v >> 16) & 0xff)
|
||||
dst[3] = byte((v >> 24) & 0xff)
|
||||
switch {
|
||||
case dst[0] == 0:
|
||||
return r[:i*4]
|
||||
case dst[1] == 0:
|
||||
return r[:i*4+1]
|
||||
case dst[2] == 0:
|
||||
return r[:i*4+2]
|
||||
case dst[3] == 0:
|
||||
return r[:i*4+3]
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
32
vendor/github.com/templexxx/cpu/cpu_x86.s
generated
vendored
Normal file
32
vendor/github.com/templexxx/cpu/cpu_x86.s
generated
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·cpuid(SB), NOSPLIT, $0-24
|
||||
MOVL eaxArg+0(FP), AX
|
||||
MOVL ecxArg+4(FP), CX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
||||
// func xgetbv() (eax, edx uint32)
|
||||
TEXT ·xgetbv(SB),NOSPLIT,$0-8
|
||||
#ifdef GOOS_nacl
|
||||
// nacl does not support XGETBV.
|
||||
MOVL $0, eax+0(FP)
|
||||
MOVL $0, edx+4(FP)
|
||||
#else
|
||||
MOVL $0, CX
|
||||
XGETBV
|
||||
MOVL AX, eax+0(FP)
|
||||
MOVL DX, edx+4(FP)
|
||||
#endif
|
||||
RET
|
1
vendor/github.com/templexxx/xorsimd/.gitattributes
generated
vendored
Normal file
1
vendor/github.com/templexxx/xorsimd/.gitattributes
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*.s linguist-language=go:x
|
13
vendor/github.com/templexxx/xorsimd/.gitignore
generated
vendored
Normal file
13
vendor/github.com/templexxx/xorsimd/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.exe~
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, build with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
.idea
|
21
vendor/github.com/templexxx/xorsimd/LICENSE
generated
vendored
Normal file
21
vendor/github.com/templexxx/xorsimd/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Temple3x (temple3x@gmail.com)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
46
vendor/github.com/templexxx/xorsimd/README.md
generated
vendored
Normal file
46
vendor/github.com/templexxx/xorsimd/README.md
generated
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
# XOR SIMD
|
||||
|
||||
[![GoDoc][1]][2] [![MIT licensed][3]][4] [![Build Status][5]][6] [![Go Report Card][7]][8] [![Sourcegraph][9]][10]
|
||||
|
||||
[1]: https://godoc.org/github.com/templexxx/xorsimd?status.svg
|
||||
[2]: https://godoc.org/github.com/templexxx/xorsimd
|
||||
[3]: https://img.shields.io/badge/license-MIT-blue.svg
|
||||
[4]: LICENSE
|
||||
[5]: https://github.com/templexxx/xorsimd/workflows/unit-test/badge.svg
|
||||
[6]: https://github.com/templexxx/xorsimd
|
||||
[7]: https://goreportcard.com/badge/github.com/templexxx/xorsimd
|
||||
[8]: https://goreportcard.com/report/github.com/templexxx/xorsimd
|
||||
[9]: https://sourcegraph.com/github.com/templexxx/xorsimd/-/badge.svg
|
||||
[10]: https://sourcegraph.com/github.com/templexxx/xorsimd?badge
|
||||
|
||||
## Introduction:
|
||||
|
||||
>- XOR code engine in pure Go.
|
||||
>
|
||||
>- [High Performance](https://github.com/templexxx/xorsimd#performance):
|
||||
More than 270GB/s per physics core.
|
||||
|
||||
## Performance
|
||||
|
||||
Performance depends mainly on:
|
||||
|
||||
>- CPU instruction extension.
|
||||
>
|
||||
>- Number of source row vectors.
|
||||
|
||||
**Platform:**
|
||||
|
||||
*AWS c5d.xlarge (Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz)*
|
||||
|
||||
**All test run on a single Core.**
|
||||
|
||||
`I/O = (src_num + 1) * vector_size / cost`
|
||||
|
||||
| Src Num | Vector size | AVX512 I/O (MB/S) | AVX2 I/O (MB/S) |SSE2 I/O (MB/S) |
|
||||
|-------|-------------|-------------|---------------|---------------|
|
||||
|5|4KB| 270403.73 | 142825.25 | 74443.91 |
|
||||
|5|1MB| 26948.34 | 26887.37 | 26950.65 |
|
||||
|5|8MB| 17881.32 | 17212.56 | 16402.97 |
|
||||
|10|4KB| 190445.30 | 102953.59 | 53244.04 |
|
||||
|10|1MB| 26424.44 | 26618.65 | 26094.39 |
|
||||
|10|8MB| 15471.31 | 14866.72 | 13565.80 |
|
5
vendor/github.com/templexxx/xorsimd/go.mod
generated
vendored
Normal file
5
vendor/github.com/templexxx/xorsimd/go.mod
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
module github.com/templexxx/xorsimd
|
||||
|
||||
require github.com/templexxx/cpu v0.0.1
|
||||
|
||||
go 1.13
|
2
vendor/github.com/templexxx/xorsimd/go.sum
generated
vendored
Normal file
2
vendor/github.com/templexxx/xorsimd/go.sum
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
github.com/templexxx/cpu v0.0.1 h1:hY4WdLOgKdc8y13EYklu9OUTXik80BkxHoWvTO6MQQY=
|
||||
github.com/templexxx/cpu v0.0.1/go.mod h1:w7Tb+7qgcAlIyX4NhLuDKt78AHA5SzPmq0Wj6HiEnnk=
|
89
vendor/github.com/templexxx/xorsimd/xor.go
generated
vendored
Normal file
89
vendor/github.com/templexxx/xorsimd/xor.go
generated
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
// Copyright (c) 2019. Temple3x (temple3x@gmail.com)
|
||||
//
|
||||
// Use of this source code is governed by the MIT License
|
||||
// that can be found in the LICENSE file.
|
||||
|
||||
package xorsimd
|
||||
|
||||
import "github.com/templexxx/cpu"
|
||||
|
||||
// EnableAVX512 may slow down CPU Clock (maybe not).
|
||||
// TODO need more research:
|
||||
// https://lemire.me/blog/2018/04/19/by-how-much-does-avx-512-slow-down-your-cpu-a-first-experiment/
|
||||
var EnableAVX512 = true
|
||||
|
||||
// cpuFeature indicates which instruction set will be used.
|
||||
var cpuFeature = getCPUFeature()
|
||||
|
||||
const (
|
||||
avx512 = iota
|
||||
avx2
|
||||
sse2
|
||||
generic
|
||||
)
|
||||
|
||||
// TODO: Add ARM feature...
|
||||
func getCPUFeature() int {
|
||||
if hasAVX512() && EnableAVX512 {
|
||||
return avx512
|
||||
} else if cpu.X86.HasAVX2 {
|
||||
return avx2
|
||||
} else {
|
||||
return sse2 // amd64 must has sse2
|
||||
}
|
||||
}
|
||||
|
||||
func hasAVX512() (ok bool) {
|
||||
|
||||
return cpu.X86.HasAVX512VL &&
|
||||
cpu.X86.HasAVX512BW &&
|
||||
cpu.X86.HasAVX512F &&
|
||||
cpu.X86.HasAVX512DQ
|
||||
}
|
||||
|
||||
// Encode encodes elements from source slice into a
|
||||
// destination slice. The source and destination may overlap.
|
||||
// Encode returns the number of bytes encoded, which will be the minimum of
|
||||
// len(src[i]) and len(dst).
|
||||
func Encode(dst []byte, src [][]byte) (n int) {
|
||||
n = checkLen(dst, src)
|
||||
if n == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
dst = dst[:n]
|
||||
for i := range src {
|
||||
src[i] = src[i][:n]
|
||||
}
|
||||
|
||||
if len(src) == 1 {
|
||||
copy(dst, src[0])
|
||||
return
|
||||
}
|
||||
|
||||
encode(dst, src)
|
||||
return
|
||||
}
|
||||
|
||||
func checkLen(dst []byte, src [][]byte) int {
|
||||
n := len(dst)
|
||||
for i := range src {
|
||||
if len(src[i]) < n {
|
||||
n = len(src[i])
|
||||
}
|
||||
}
|
||||
|
||||
if n <= 0 {
|
||||
return 0
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// Bytes XORs the bytes in a and b into a
|
||||
// destination slice. The source and destination may overlap.
|
||||
//
|
||||
// Bytes returns the number of bytes encoded, which will be the minimum of
|
||||
// len(dst), len(a), len(b).
|
||||
func Bytes(dst, a, b []byte) int {
|
||||
return Encode(dst, [][]byte{a, b})
|
||||
}
|
95
vendor/github.com/templexxx/xorsimd/xor_amd64.go
generated
vendored
Normal file
95
vendor/github.com/templexxx/xorsimd/xor_amd64.go
generated
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright (c) 2019. Temple3x (temple3x@gmail.com)
|
||||
//
|
||||
// Use of this source code is governed by the MIT License
|
||||
// that can be found in the LICENSE file.
|
||||
|
||||
package xorsimd
|
||||
|
||||
func encode(dst []byte, src [][]byte) {
|
||||
|
||||
switch cpuFeature {
|
||||
case avx512:
|
||||
encodeAVX512(dst, src)
|
||||
case avx2:
|
||||
encodeAVX2(dst, src)
|
||||
default:
|
||||
encodeSSE2(dst, src)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Bytes8 XORs of 8 Bytes.
|
||||
// The slice arguments a, b, dst's lengths are assumed to be at least 8,
|
||||
// if not, Bytes8 will panic.
|
||||
func Bytes8(dst, a, b []byte) {
|
||||
|
||||
bytes8(&dst[0], &a[0], &b[0])
|
||||
}
|
||||
|
||||
// Bytes16 XORs of packed 16 Bytes.
|
||||
// The slice arguments a, b, dst's lengths are assumed to be at least 16,
|
||||
// if not, Bytes16 will panic.
|
||||
func Bytes16(dst, a, b []byte) {
|
||||
|
||||
bytes16(&dst[0], &a[0], &b[0])
|
||||
}
|
||||
|
||||
// Bytes8Align XORs of 8 Bytes.
|
||||
// The slice arguments a, b, dst's lengths are assumed to be at least 8,
|
||||
// if not, Bytes8 will panic.
|
||||
func Bytes8Align(dst, a, b []byte) {
|
||||
|
||||
bytes8(&dst[0], &a[0], &b[0])
|
||||
}
|
||||
|
||||
// Bytes16Align XORs of packed 16 Bytes.
|
||||
// The slice arguments a, b, dst's lengths are assumed to be at least 16,
|
||||
// if not, Bytes16 will panic.
|
||||
func Bytes16Align(dst, a, b []byte) {
|
||||
|
||||
bytes16(&dst[0], &a[0], &b[0])
|
||||
}
|
||||
|
||||
// BytesA XORs the len(a) bytes in a and b into a
|
||||
// destination slice.
|
||||
// The destination should have enough space.
|
||||
//
|
||||
// It's used for encoding small bytes slices (< dozens bytes),
|
||||
// and the slices may not be aligned to 8 bytes or 16 bytes.
|
||||
// If the length is big, it's better to use 'func Bytes(dst, a, b []byte)' instead
|
||||
// for gain better performance.
|
||||
func BytesA(dst, a, b []byte) {
|
||||
|
||||
bytesN(&dst[0], &a[0], &b[0], len(a))
|
||||
}
|
||||
|
||||
// BytesB XORs the len(b) bytes in a and b into a
|
||||
// destination slice.
|
||||
// The destination should have enough space.
|
||||
//
|
||||
// It's used for encoding small bytes slices (< dozens bytes),
|
||||
// and the slices may not be aligned to 8 bytes or 16 bytes.
|
||||
// If the length is big, it's better to use 'func Bytes(dst, a, b []byte)' instead
|
||||
// for gain better performance.
|
||||
func BytesB(dst, a, b []byte) {
|
||||
|
||||
bytesN(&dst[0], &a[0], &b[0], len(b))
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func encodeAVX512(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func encodeAVX2(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func encodeSSE2(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func bytesN(dst, a, b *byte, n int)
|
||||
|
||||
//go:noescape
|
||||
func bytes8(dst, a, b *byte)
|
||||
|
||||
//go:noescape
|
||||
func bytes16(dst, a, b *byte)
|
205
vendor/github.com/templexxx/xorsimd/xor_generic.go
generated
vendored
Normal file
205
vendor/github.com/templexxx/xorsimd/xor_generic.go
generated
vendored
Normal file
@@ -0,0 +1,205 @@
|
||||
// Copyright (c) 2019. Temple3x (temple3x@gmail.com)
|
||||
//
|
||||
// Use of this source code is governed by the MIT License
|
||||
// that can be found in the LICENSE file.
|
||||
//
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64
|
||||
|
||||
package xorsimd
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const wordSize = int(unsafe.Sizeof(uintptr(0)))
|
||||
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
|
||||
|
||||
func encode(dst []byte, src [][]byte) {
|
||||
if supportsUnaligned {
|
||||
fastEncode(dst, src, len(dst))
|
||||
} else {
|
||||
// TODO(hanwen): if (dst, a, b) have common alignment
|
||||
// we could still try fastEncode. It is not clear
|
||||
// how often this happens, and it's only worth it if
|
||||
// the block encryption itself is hardware
|
||||
// accelerated.
|
||||
safeEncode(dst, src, len(dst))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// fastEncode xor in bulk. It only works on architectures that
|
||||
// support unaligned read/writes.
|
||||
func fastEncode(dst []byte, src [][]byte, n int) {
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
wordBytes := w * wordSize
|
||||
|
||||
wordAlignSrc := make([][]byte, len(src))
|
||||
for i := range src {
|
||||
wordAlignSrc[i] = src[i][:wordBytes]
|
||||
}
|
||||
fastEnc(dst[:wordBytes], wordAlignSrc)
|
||||
}
|
||||
|
||||
for i := n - n%wordSize; i < n; i++ {
|
||||
s := src[0][i]
|
||||
for j := 1; j < len(src); j++ {
|
||||
s ^= src[j][i]
|
||||
}
|
||||
dst[i] = s
|
||||
}
|
||||
}
|
||||
|
||||
func fastEnc(dst []byte, src [][]byte) {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
sw := make([][]uintptr, len(src))
|
||||
for i := range src {
|
||||
sw[i] = *(*[]uintptr)(unsafe.Pointer(&src[i]))
|
||||
}
|
||||
|
||||
n := len(dst) / wordSize
|
||||
for i := 0; i < n; i++ {
|
||||
s := sw[0][i]
|
||||
for j := 1; j < len(sw); j++ {
|
||||
s ^= sw[j][i]
|
||||
}
|
||||
dw[i] = s
|
||||
}
|
||||
}
|
||||
|
||||
func safeEncode(dst []byte, src [][]byte, n int) {
|
||||
for i := 0; i < n; i++ {
|
||||
s := src[0][i]
|
||||
for j := 1; j < len(src); j++ {
|
||||
s ^= src[j][i]
|
||||
}
|
||||
dst[i] = s
|
||||
}
|
||||
}
|
||||
|
||||
// Bytes8 XORs of word 8 Bytes.
|
||||
// The slice arguments a, b, dst's lengths are assumed to be at least 8,
|
||||
// if not, Bytes8 will panic.
|
||||
func Bytes8(dst, a, b []byte) {
|
||||
|
||||
bytesWords(dst[:8], a[:8], b[:8])
|
||||
}
|
||||
|
||||
// Bytes16 XORs of packed doubleword 16 Bytes.
|
||||
// The slice arguments a, b, dst's lengths are assumed to be at least 16,
|
||||
// if not, Bytes16 will panic.
|
||||
func Bytes16(dst, a, b []byte) {
|
||||
|
||||
bytesWords(dst[:16], a[:16], b[:16])
|
||||
}
|
||||
|
||||
// bytesWords XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||
// The slice arguments a and b are assumed to be of equal length.
|
||||
func bytesWords(dst, a, b []byte) {
|
||||
if supportsUnaligned {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
n := len(b) / wordSize
|
||||
for i := 0; i < n; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
} else {
|
||||
n := len(b)
|
||||
for i := 0; i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bytes8Align XORs of 8 Bytes.
|
||||
// The slice arguments a, b, dst's lengths are assumed to be at least 8,
|
||||
// if not, Bytes8 will panic.
|
||||
//
|
||||
// All the byte slices must be aligned to wordsize.
|
||||
func Bytes8Align(dst, a, b []byte) {
|
||||
|
||||
bytesWordsAlign(dst[:8], a[:8], b[:8])
|
||||
}
|
||||
|
||||
// Bytes16Align XORs of packed 16 Bytes.
|
||||
// The slice arguments a, b, dst's lengths are assumed to be at least 16,
|
||||
// if not, Bytes16 will panic.
|
||||
//
|
||||
// All the byte slices must be aligned to wordsize.
|
||||
func Bytes16Align(dst, a, b []byte) {
|
||||
|
||||
bytesWordsAlign(dst[:16], a[:16], b[:16])
|
||||
}
|
||||
|
||||
// bytesWordsAlign XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||
// The slice arguments a and b are assumed to be of equal length.
|
||||
//
|
||||
// All the byte slices must be aligned to wordsize.
|
||||
func bytesWordsAlign(dst, a, b []byte) {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
n := len(b) / wordSize
|
||||
for i := 0; i < n; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
}
|
||||
|
||||
// BytesA XORs the len(a) bytes in a and b into a
|
||||
// destination slice.
|
||||
// The destination should have enough space.
|
||||
//
|
||||
// It's used for encoding small bytes slices (< dozens bytes),
|
||||
// and the slices may not be aligned to 8 bytes or 16 bytes.
|
||||
// If the length is big, it's better to use 'func Bytes(dst, a, b []byte)' instead
|
||||
// for gain better performance.
|
||||
func BytesA(dst, a, b []byte) {
|
||||
|
||||
n := len(a)
|
||||
bytesN(dst[:n], a[:n], b[:n], n)
|
||||
}
|
||||
|
||||
// BytesB XORs the len(b) bytes in a and b into a
|
||||
// destination slice.
|
||||
// The destination should have enough space.
|
||||
//
|
||||
// It's used for encoding small bytes slices (< dozens bytes),
|
||||
// and the slices may not be aligned to 8 bytes or 16 bytes.
|
||||
// If the length is big, it's better to use 'func Bytes(dst, a, b []byte)' instead
|
||||
// for gain better performance.
|
||||
func BytesB(dst, a, b []byte) {
|
||||
|
||||
n := len(b)
|
||||
bytesN(dst[:n], a[:n], b[:n], n)
|
||||
}
|
||||
|
||||
func bytesN(dst, a, b []byte, n int) {
|
||||
|
||||
switch {
|
||||
case supportsUnaligned:
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
for i := 0; i < w; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
}
|
||||
|
||||
for i := (n - n%wordSize); i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
default:
|
||||
for i := 0; i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
}
|
||||
}
|
124
vendor/github.com/templexxx/xorsimd/xoravx2_amd64.s
generated
vendored
Normal file
124
vendor/github.com/templexxx/xorsimd/xoravx2_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
// Copyright (c) 2019. Temple3x (temple3x@gmail.com)
|
||||
//
|
||||
// Use of this source code is governed by the MIT License
|
||||
// that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define dst BX // parity's address
|
||||
#define d2src SI // two-dimension src_slice's address
|
||||
#define csrc CX // cnt of src
|
||||
#define len DX // len of vect
|
||||
#define pos R8 // job position in vect
|
||||
|
||||
#define csrc_tmp R9
|
||||
#define d2src_off R10
|
||||
#define src_tmp R11
|
||||
#define not_aligned_len R12
|
||||
#define src_val0 R13
|
||||
#define src_val1 R14
|
||||
|
||||
// func encodeAVX2(dst []byte, src [][]byte)
|
||||
TEXT ·encodeAVX2(SB), NOSPLIT, $0
|
||||
MOVQ d+0(FP), dst
|
||||
MOVQ s+24(FP), d2src
|
||||
MOVQ c+32(FP), csrc
|
||||
MOVQ l+8(FP), len
|
||||
TESTQ $127, len
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, pos
|
||||
|
||||
loop128b:
|
||||
MOVQ csrc, csrc_tmp // store src_cnt -> csrc_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp // get first src_vect's addr -> src_tmp
|
||||
VMOVDQU (src_tmp)(pos*1), Y0
|
||||
VMOVDQU 32(src_tmp)(pos*1), Y1
|
||||
VMOVDQU 64(src_tmp)(pos*1), Y2
|
||||
VMOVDQU 96(src_tmp)(pos*1), Y3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, d2src_off // len(slice) = 24
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp // next data_vect
|
||||
VMOVDQU (src_tmp)(pos*1), Y4
|
||||
VMOVDQU 32(src_tmp)(pos*1), Y5
|
||||
VMOVDQU 64(src_tmp)(pos*1), Y6
|
||||
VMOVDQU 96(src_tmp)(pos*1), Y7
|
||||
VPXOR Y4, Y0, Y0
|
||||
VPXOR Y5, Y1, Y1
|
||||
VPXOR Y6, Y2, Y2
|
||||
VPXOR Y7, Y3, Y3
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect
|
||||
|
||||
VMOVDQU Y0, (dst)(pos*1)
|
||||
VMOVDQU Y1, 32(dst)(pos*1)
|
||||
VMOVDQU Y2, 64(dst)(pos*1)
|
||||
VMOVDQU Y3, 96(dst)(pos*1)
|
||||
|
||||
ADDQ $128, pos
|
||||
CMPQ len, pos
|
||||
JNE loop128b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ csrc, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVB -1(src_tmp)(len*1), src_val0 // encode from the end of src
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
MOVB -1(src_tmp)(len*1), src_val1
|
||||
XORB src_val1, src_val0
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB src_val0, -1(dst)(len*1)
|
||||
SUBQ $1, len
|
||||
TESTQ $7, len
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ len, $0
|
||||
JE ret
|
||||
TESTQ $127, len
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, len
|
||||
JNE loop_1b
|
||||
MOVQ len, not_aligned_len
|
||||
ANDQ $127, not_aligned_len
|
||||
|
||||
loop_8b:
|
||||
MOVQ csrc, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVQ -8(src_tmp)(len*1), src_val0
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
MOVQ -8(src_tmp)(len*1), src_val1
|
||||
XORQ src_val1, src_val0
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ src_val0, -8(dst)(len*1)
|
||||
SUBQ $8, len
|
||||
SUBQ $8, not_aligned_len
|
||||
JG loop_8b
|
||||
|
||||
CMPQ len, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
124
vendor/github.com/templexxx/xorsimd/xoravx512_amd64.s
generated
vendored
Normal file
124
vendor/github.com/templexxx/xorsimd/xoravx512_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
// Copyright (c) 2019. Temple3x (temple3x@gmail.com)
|
||||
//
|
||||
// Use of this source code is governed by the MIT License
|
||||
// that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define dst BX // parity's address
|
||||
#define d2src SI // two-dimension src_slice's address
|
||||
#define csrc CX // cnt of src
|
||||
#define len DX // len of vect
|
||||
#define pos R8 // job position in vect
|
||||
|
||||
#define csrc_tmp R9
|
||||
#define d2src_off R10
|
||||
#define src_tmp R11
|
||||
#define not_aligned_len R12
|
||||
#define src_val0 R13
|
||||
#define src_val1 R14
|
||||
|
||||
// func encodeAVX512(dst []byte, src [][]byte)
|
||||
TEXT ·encodeAVX512(SB), NOSPLIT, $0
|
||||
MOVQ d+0(FP), dst
|
||||
MOVQ src+24(FP), d2src
|
||||
MOVQ c+32(FP), csrc
|
||||
MOVQ l+8(FP), len
|
||||
TESTQ $255, len
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, pos
|
||||
|
||||
loop256b:
|
||||
MOVQ csrc, csrc_tmp // store src_cnt -> csrc_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp // get first src_vect's addr -> src_tmp
|
||||
VMOVDQU8 (src_tmp)(pos*1), Z0
|
||||
VMOVDQU8 64(src_tmp)(pos*1), Z1
|
||||
VMOVDQU8 128(src_tmp)(pos*1), Z2
|
||||
VMOVDQU8 192(src_tmp)(pos*1), Z3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, d2src_off // len(slice) = 24
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp // next data_vect
|
||||
VMOVDQU8 (src_tmp)(pos*1), Z4
|
||||
VMOVDQU8 64(src_tmp)(pos*1), Z5
|
||||
VMOVDQU8 128(src_tmp)(pos*1), Z6
|
||||
VMOVDQU8 192(src_tmp)(pos*1), Z7
|
||||
VPXORQ Z4, Z0, Z0
|
||||
VPXORQ Z5, Z1, Z1
|
||||
VPXORQ Z6, Z2, Z2
|
||||
VPXORQ Z7, Z3, Z3
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect
|
||||
|
||||
VMOVDQU8 Z0, (dst)(pos*1)
|
||||
VMOVDQU8 Z1, 64(dst)(pos*1)
|
||||
VMOVDQU8 Z2, 128(dst)(pos*1)
|
||||
VMOVDQU8 Z3, 192(dst)(pos*1)
|
||||
|
||||
ADDQ $256, pos
|
||||
CMPQ len, pos
|
||||
JNE loop256b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ csrc, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVB -1(src_tmp)(len*1), src_val0 // encode from the end of src
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
MOVB -1(src_tmp)(len*1), src_val1
|
||||
XORB src_val1, src_val0
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB src_val0, -1(dst)(len*1)
|
||||
SUBQ $1, len
|
||||
TESTQ $7, len
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ len, $0
|
||||
JE ret
|
||||
TESTQ $255, len
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, len
|
||||
JNE loop_1b
|
||||
MOVQ len, not_aligned_len
|
||||
ANDQ $255, not_aligned_len
|
||||
|
||||
loop_8b:
|
||||
MOVQ csrc, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVQ -8(src_tmp)(len*1), src_val0
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
MOVQ -8(src_tmp)(len*1), src_val1
|
||||
XORQ src_val1, src_val0
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ src_val0, -8(dst)(len*1)
|
||||
SUBQ $8, len
|
||||
SUBQ $8, not_aligned_len
|
||||
JG loop_8b
|
||||
|
||||
CMPQ len, $256
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
72
vendor/github.com/templexxx/xorsimd/xorbytes_amd64.s
generated
vendored
Normal file
72
vendor/github.com/templexxx/xorsimd/xorbytes_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// func bytesN(dst, a, b *byte, n int)
|
||||
TEXT ·bytesN(SB), NOSPLIT, $0
|
||||
MOVQ d+0(FP), BX
|
||||
MOVQ a+8(FP), SI
|
||||
MOVQ b+16(FP), CX
|
||||
MOVQ n+24(FP), DX
|
||||
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, AX // position in slices
|
||||
|
||||
loop16b:
|
||||
MOVOU (SI)(AX*1), X0 // XOR 16byte forwards.
|
||||
MOVOU (CX)(AX*1), X1
|
||||
PXOR X1, X0
|
||||
MOVOU X0, (BX)(AX*1)
|
||||
ADDQ $16, AX
|
||||
CMPQ DX, AX
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
SUBQ $1, DX // XOR 1byte backwards.
|
||||
MOVB (SI)(DX*1), DI
|
||||
MOVB (CX)(DX*1), AX
|
||||
XORB AX, DI
|
||||
MOVB DI, (BX)(DX*1)
|
||||
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
|
||||
JNZ loop_1b
|
||||
CMPQ DX, $0 // if len is 0, ret.
|
||||
JE ret
|
||||
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
|
||||
JNE loop_1b
|
||||
SUBQ $8, DX // XOR 8bytes backwards.
|
||||
MOVQ (SI)(DX*1), DI
|
||||
MOVQ (CX)(DX*1), AX
|
||||
XORQ AX, DI
|
||||
MOVQ DI, (BX)(DX*1)
|
||||
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
|
||||
JGE aligned
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytes8(dst, a, b *byte)
|
||||
TEXT ·bytes8(SB), NOSPLIT, $0
|
||||
MOVQ d+0(FP), BX
|
||||
MOVQ a+8(FP), SI
|
||||
MOVQ b+16(FP), CX
|
||||
MOVQ (SI), DI
|
||||
MOVQ (CX), AX
|
||||
XORQ AX, DI
|
||||
MOVQ DI, (BX)
|
||||
RET
|
||||
|
||||
// func bytes16(dst, a, b *byte)
|
||||
TEXT ·bytes16(SB), NOSPLIT, $0
|
||||
MOVQ d+0(FP), BX
|
||||
MOVQ a+8(FP), SI
|
||||
MOVQ b+16(FP), CX
|
||||
MOVOU (SI), X0
|
||||
MOVOU (CX), X1
|
||||
PXOR X1, X0
|
||||
MOVOU X0, (BX)
|
||||
RET
|
123
vendor/github.com/templexxx/xorsimd/xorsse2_amd64.s
generated
vendored
Normal file
123
vendor/github.com/templexxx/xorsimd/xorsse2_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
// Copyright (c) 2019. Temple3x (temple3x@gmail.com)
|
||||
//
|
||||
// Use of this source code is governed by the MIT License
|
||||
// that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define dst BX // parity's address
|
||||
#define d2src SI // two-dimension src_slice's address
|
||||
#define csrc CX // cnt of src
|
||||
#define len DX // len of vect
|
||||
#define pos R8 // job position in vect
|
||||
|
||||
#define csrc_tmp R9
|
||||
#define d2src_off R10
|
||||
#define src_tmp R11
|
||||
#define not_aligned_len R12
|
||||
#define src_val0 R13
|
||||
#define src_val1 R14
|
||||
|
||||
// func encodeSSE2(dst []byte, src [][]byte)
|
||||
TEXT ·encodeSSE2(SB), NOSPLIT, $0
|
||||
MOVQ d+0(FP), dst
|
||||
MOVQ src+24(FP), d2src
|
||||
MOVQ c+32(FP), csrc
|
||||
MOVQ l+8(FP), len
|
||||
TESTQ $63, len
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, pos
|
||||
|
||||
loop64b:
|
||||
MOVQ csrc, csrc_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
MOVOU (src_tmp)(pos*1), X0
|
||||
MOVOU 16(src_tmp)(pos*1), X1
|
||||
MOVOU 32(src_tmp)(pos*1), X2
|
||||
MOVOU 48(src_tmp)(pos*1), X3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
MOVOU (src_tmp)(pos*1), X4
|
||||
MOVOU 16(src_tmp)(pos*1), X5
|
||||
MOVOU 32(src_tmp)(pos*1), X6
|
||||
MOVOU 48(src_tmp)(pos*1), X7
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect
|
||||
|
||||
MOVOU X0, (dst)(pos*1)
|
||||
MOVOU X1, 16(dst)(pos*1)
|
||||
MOVOU X2, 32(dst)(pos*1)
|
||||
MOVOU X3, 48(dst)(pos*1)
|
||||
|
||||
ADDQ $64, pos
|
||||
CMPQ len, pos
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ csrc, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVB -1(src_tmp)(len*1), src_val0
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
MOVB -1(src_tmp)(len*1), src_val1
|
||||
XORB src_val1, src_val0
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB src_val0, -1(dst)(len*1)
|
||||
SUBQ $1, len
|
||||
TESTQ $7, len
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ len, $0
|
||||
JE ret
|
||||
TESTQ $63, len
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, len
|
||||
JNE loop_1b
|
||||
MOVQ len, not_aligned_len
|
||||
ANDQ $63, not_aligned_len
|
||||
|
||||
loop_8b:
|
||||
MOVQ csrc, csrc_tmp
|
||||
MOVQ $0, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
SUBQ $2, csrc_tmp
|
||||
MOVQ -8(src_tmp)(len*1), src_val0
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, d2src_off
|
||||
MOVQ (d2src)(d2src_off*1), src_tmp
|
||||
MOVQ -8(src_tmp)(len*1), src_val1
|
||||
XORQ src_val1, src_val0
|
||||
SUBQ $1, csrc_tmp
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ src_val0, -8(dst)(len*1)
|
||||
SUBQ $8, len
|
||||
SUBQ $8, not_aligned_len
|
||||
JG loop_8b
|
||||
|
||||
CMPQ len, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
Reference in New Issue
Block a user