Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support SHA* intrinsics on Intel CPU #37

Merged
merged 2 commits into from
Jan 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.test
3 changes: 1 addition & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ os:

go:
- tip
- 1.11
- 1.10
- 1.11.x

env:
- ARCH=x86_64
Expand Down
146 changes: 86 additions & 60 deletions cpuid.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,78 +16,104 @@
package sha256

// True when SIMD instructions are available.
var avx512 = haveAVX512()
var avx2 = haveAVX2()
var avx = haveAVX()
var ssse3 = haveSSSE3()
var avx512 bool
var avx2 bool
var avx bool
var sse bool
var sse2 bool
var sse3 bool
var ssse3 bool
var sse41 bool
var sse42 bool
var popcnt bool
var sha bool
var armSha = haveArmSha()

// haveAVX returns true when there is AVX support
func haveAVX() bool {
_, _, c, _ := cpuid(1)
func init() {
var _xsave bool
var _osxsave bool
var _avx bool
var _avx2 bool
var _avx512f bool
var _avx512dq bool
// var _avx512pf bool
// var _avx512er bool
// var _avx512cd bool
var _avx512bw bool
var _avx512vl bool
var _sseState bool
var _avxState bool
var _opmaskState bool
var _zmmHI256State bool
var _hi16ZmmState bool

// Check XGETBV, OXSAVE and AVX bits
if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
// Check for OS support
eax, _ := xgetbv(0)
return (eax & 0x6) == 0x6
}
return false
}

// haveAVX2 returns true when there is AVX2 support
func haveAVX2() bool {
mfi, _, _, _ := cpuid(0)

// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
if mfi >= 7 && haveAVX() {
_, ebx, _, _ := cpuidex(7, 0)
return (ebx & 0x00000020) != 0
}
return false
}
if mfi >= 1 {
_, _, c, d := cpuid(1)

// haveAVX512 returns true when there is AVX512 support
func haveAVX512() bool {
mfi, _, _, _ := cpuid(0)
sse = (d & (1 << 25)) != 0
sse2 = (d & (1 << 26)) != 0
sse3 = (c & (1 << 0)) != 0
ssse3 = (c & (1 << 9)) != 0
sse41 = (c & (1 << 19)) != 0
sse42 = (c & (1 << 20)) != 0
popcnt = (c & (1 << 23)) != 0
_xsave = (c & (1 << 26)) != 0
_osxsave = (c & (1 << 27)) != 0
_avx = (c & (1 << 28)) != 0
}

// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
if mfi >= 7 {
_, _, c, _ := cpuid(1)
_, b, _, _ := cpuid(7)

// Only detect AVX-512 features if XGETBV is supported
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
// Check for OS support
eax, _ := xgetbv(0)
_, ebx, _, _ := cpuidex(7, 0)
_avx2 = (b & (1 << 5)) != 0
_avx512f = (b & (1 << 16)) != 0
_avx512dq = (b & (1 << 17)) != 0
// _avx512pf = (b & (1 << 26)) != 0
// _avx512er = (b & (1 << 27)) != 0
// _avx512cd = (b & (1 << 28)) != 0
_avx512bw = (b & (1 << 30)) != 0
_avx512vl = (b & (1 << 31)) != 0
sha = (b & (1 << 29)) != 0
}

// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
// ZMM16-ZMM31 state are enabled by OS)
/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
if ebx&(1<<16) == 0 {
return false // no AVX512F
}
if ebx&(1<<17) == 0 {
return false // no AVX512DQ
}
if ebx&(1<<30) == 0 {
return false // no AVX512BW
}
if ebx&(1<<31) == 0 {
return false // no AVX512VL
}
return true
}
}
// Stop here if XSAVE unsupported or not enabled
if !_xsave || !_osxsave {
return
}
return false
}

// haveSSSE3 returns true when there is SSSE3 support
func haveSSSE3() bool {
if _xsave && _osxsave {
a, _ := xgetbv(0)

_sseState = (a & (1 << 1)) != 0
_avxState = (a & (1 << 2)) != 0
_opmaskState = (a & (1 << 5)) != 0
_zmmHI256State = (a & (1 << 6)) != 0
_hi16ZmmState = (a & (1 << 7)) != 0
} else {
_sseState = true
}

_, _, c, _ := cpuid(1)
// Very unlikely that OS would enable XSAVE and then disable SSE
if !_sseState {
sse = false
sse2 = false
sse3 = false
ssse3 = false
sse41 = false
sse42 = false
}

return (c & 0x00000200) != 0
if _avxState {
avx = _avx
avx2 = _avx2
}

if _opmaskState && _zmmHI256State && _hi16ZmmState {
avx512 = (_avx512f &&
_avx512dq &&
_avx512bw &&
_avx512vl)
}
}
42 changes: 21 additions & 21 deletions cpuid_386.s
Original file line number Diff line number Diff line change
Expand Up @@ -24,30 +24,30 @@

// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET

// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET

// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·xgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET
43 changes: 21 additions & 22 deletions cpuid_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,30 @@

// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET

XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET

// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET

// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·xgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET
Loading