mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add AES instructions
AVXAES is a composite feature set, Intel did listed it as "AVXAES" in the XED data instead of separating them. The tests will be in the next CL. Change-Id: I89c97261f2228b2fdafb48f63e82ef6239bdd5ca Reviewed-on: https://go-review.googlesource.com/c/go/+/706055 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
1c961c2fb2
commit
703a5fbaad
15 changed files with 497 additions and 4 deletions
|
|
@ -12,7 +12,8 @@ import (
|
||||||
func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
var p *obj.Prog
|
var p *obj.Prog
|
||||||
switch v.Op {
|
switch v.Op {
|
||||||
case ssa.OpAMD64VPABSB128,
|
case ssa.OpAMD64VAESIMC128,
|
||||||
|
ssa.OpAMD64VPABSB128,
|
||||||
ssa.OpAMD64VPABSB256,
|
ssa.OpAMD64VPABSB256,
|
||||||
ssa.OpAMD64VPABSB512,
|
ssa.OpAMD64VPABSB512,
|
||||||
ssa.OpAMD64VPABSW128,
|
ssa.OpAMD64VPABSW128,
|
||||||
|
|
@ -148,7 +149,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VSQRTPD512:
|
ssa.OpAMD64VSQRTPD512:
|
||||||
p = simdV11(s, v)
|
p = simdV11(s, v)
|
||||||
|
|
||||||
case ssa.OpAMD64VADDPS128,
|
case ssa.OpAMD64VAESDECLAST128,
|
||||||
|
ssa.OpAMD64VAESDECLAST256,
|
||||||
|
ssa.OpAMD64VAESDEC128,
|
||||||
|
ssa.OpAMD64VAESDEC256,
|
||||||
|
ssa.OpAMD64VAESENCLAST128,
|
||||||
|
ssa.OpAMD64VAESENCLAST256,
|
||||||
|
ssa.OpAMD64VAESENC128,
|
||||||
|
ssa.OpAMD64VAESENC256,
|
||||||
|
ssa.OpAMD64VADDPS128,
|
||||||
ssa.OpAMD64VADDPS256,
|
ssa.OpAMD64VADDPS256,
|
||||||
ssa.OpAMD64VADDPS512,
|
ssa.OpAMD64VADDPS512,
|
||||||
ssa.OpAMD64VADDPD128,
|
ssa.OpAMD64VADDPD128,
|
||||||
|
|
@ -917,7 +926,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPBLENDVB256:
|
ssa.OpAMD64VPBLENDVB256:
|
||||||
p = simdV31(s, v)
|
p = simdV31(s, v)
|
||||||
|
|
||||||
case ssa.OpAMD64VROUNDPS128,
|
case ssa.OpAMD64VAESKEYGENASSIST128,
|
||||||
|
ssa.OpAMD64VROUNDPS128,
|
||||||
ssa.OpAMD64VROUNDPS256,
|
ssa.OpAMD64VROUNDPS256,
|
||||||
ssa.OpAMD64VROUNDPD128,
|
ssa.OpAMD64VROUNDPD128,
|
||||||
ssa.OpAMD64VROUNDPD256,
|
ssa.OpAMD64VROUNDPD256,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,15 @@
|
||||||
// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
|
// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
|
||||||
|
|
||||||
|
(AESDecryptLastRoundUint8x16 ...) => (VAESDECLAST128 ...)
|
||||||
|
(AESDecryptLastRoundUint8x32 ...) => (VAESDECLAST256 ...)
|
||||||
|
(AESDecryptRoundUint8x16 ...) => (VAESDEC128 ...)
|
||||||
|
(AESDecryptRoundUint8x32 ...) => (VAESDEC256 ...)
|
||||||
|
(AESEncryptLastRoundUint8x16 ...) => (VAESENCLAST128 ...)
|
||||||
|
(AESEncryptLastRoundUint8x32 ...) => (VAESENCLAST256 ...)
|
||||||
|
(AESEncryptRoundUint8x16 ...) => (VAESENC128 ...)
|
||||||
|
(AESEncryptRoundUint8x32 ...) => (VAESENC256 ...)
|
||||||
|
(AESInvMixColumnsUint32x4 ...) => (VAESIMC128 ...)
|
||||||
|
(AESRoundKeyGenAssistUint32x4 ...) => (VAESKEYGENASSIST128 ...)
|
||||||
(AbsInt8x16 ...) => (VPABSB128 ...)
|
(AbsInt8x16 ...) => (VPABSB128 ...)
|
||||||
(AbsInt8x32 ...) => (VPABSB256 ...)
|
(AbsInt8x32 ...) => (VPABSB256 ...)
|
||||||
(AbsInt8x64 ...) => (VPABSB512 ...)
|
(AbsInt8x64 ...) => (VPABSB512 ...)
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VADDSUBPD256", argLength: 2, reg: v21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VADDSUBPD256", argLength: 2, reg: v21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VAESDEC128", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VAESDEC256", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VAESDECLAST128", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VAESDECLAST256", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VAESENC128", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VAESENC256", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VAESENCLAST128", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VAESENCLAST256", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VAESIMC128", argLength: 1, reg: v11, asm: "VAESIMC", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VBROADCASTSDMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VBROADCASTSDMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
|
@ -1084,6 +1093,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VSUBPSMasked128", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VSUBPSMasked128", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VSUBPSMasked256", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VSUBPSMasked256", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VSUBPSMasked512", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VSUBPSMasked512", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VAESKEYGENASSIST128", argLength: 1, reg: v11, asm: "VAESKEYGENASSIST", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VROUNDPS128", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VROUNDPS128", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VROUNDPS256", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VROUNDPS256", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VROUNDPD128", argLength: 1, reg: v11, asm: "VROUNDPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VROUNDPD128", argLength: 1, reg: v11, asm: "VROUNDPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,15 @@ package main
|
||||||
|
|
||||||
func simdGenericOps() []opData {
|
func simdGenericOps() []opData {
|
||||||
return []opData{
|
return []opData{
|
||||||
|
{name: "AESDecryptLastRoundUint8x16", argLength: 2, commutative: false},
|
||||||
|
{name: "AESDecryptLastRoundUint8x32", argLength: 2, commutative: false},
|
||||||
|
{name: "AESDecryptRoundUint8x16", argLength: 2, commutative: false},
|
||||||
|
{name: "AESDecryptRoundUint8x32", argLength: 2, commutative: false},
|
||||||
|
{name: "AESEncryptLastRoundUint8x16", argLength: 2, commutative: false},
|
||||||
|
{name: "AESEncryptLastRoundUint8x32", argLength: 2, commutative: false},
|
||||||
|
{name: "AESEncryptRoundUint8x16", argLength: 2, commutative: false},
|
||||||
|
{name: "AESEncryptRoundUint8x32", argLength: 2, commutative: false},
|
||||||
|
{name: "AESInvMixColumnsUint32x4", argLength: 1, commutative: false},
|
||||||
{name: "AbsInt8x16", argLength: 1, commutative: false},
|
{name: "AbsInt8x16", argLength: 1, commutative: false},
|
||||||
{name: "AbsInt8x32", argLength: 1, commutative: false},
|
{name: "AbsInt8x32", argLength: 1, commutative: false},
|
||||||
{name: "AbsInt8x64", argLength: 1, commutative: false},
|
{name: "AbsInt8x64", argLength: 1, commutative: false},
|
||||||
|
|
@ -1101,6 +1110,7 @@ func simdGenericOps() []opData {
|
||||||
{name: "moveMaskedUint16x32", argLength: 2, commutative: false},
|
{name: "moveMaskedUint16x32", argLength: 2, commutative: false},
|
||||||
{name: "moveMaskedUint32x16", argLength: 2, commutative: false},
|
{name: "moveMaskedUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "moveMaskedUint64x8", argLength: 2, commutative: false},
|
{name: "moveMaskedUint64x8", argLength: 2, commutative: false},
|
||||||
|
{name: "AESRoundKeyGenAssistUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "CeilScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "CeilScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "CeilScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "CeilScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "CeilScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "CeilScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
|
|
||||||
|
|
@ -1253,6 +1253,15 @@ const (
|
||||||
OpAMD64VADDSUBPD256
|
OpAMD64VADDSUBPD256
|
||||||
OpAMD64VADDSUBPS128
|
OpAMD64VADDSUBPS128
|
||||||
OpAMD64VADDSUBPS256
|
OpAMD64VADDSUBPS256
|
||||||
|
OpAMD64VAESDEC128
|
||||||
|
OpAMD64VAESDEC256
|
||||||
|
OpAMD64VAESDECLAST128
|
||||||
|
OpAMD64VAESDECLAST256
|
||||||
|
OpAMD64VAESENC128
|
||||||
|
OpAMD64VAESENC256
|
||||||
|
OpAMD64VAESENCLAST128
|
||||||
|
OpAMD64VAESENCLAST256
|
||||||
|
OpAMD64VAESIMC128
|
||||||
OpAMD64VBROADCASTSD256
|
OpAMD64VBROADCASTSD256
|
||||||
OpAMD64VBROADCASTSD512
|
OpAMD64VBROADCASTSD512
|
||||||
OpAMD64VBROADCASTSDMasked256
|
OpAMD64VBROADCASTSDMasked256
|
||||||
|
|
@ -2316,6 +2325,7 @@ const (
|
||||||
OpAMD64VSUBPSMasked128
|
OpAMD64VSUBPSMasked128
|
||||||
OpAMD64VSUBPSMasked256
|
OpAMD64VSUBPSMasked256
|
||||||
OpAMD64VSUBPSMasked512
|
OpAMD64VSUBPSMasked512
|
||||||
|
OpAMD64VAESKEYGENASSIST128
|
||||||
OpAMD64VROUNDPS128
|
OpAMD64VROUNDPS128
|
||||||
OpAMD64VROUNDPS256
|
OpAMD64VROUNDPS256
|
||||||
OpAMD64VROUNDPD128
|
OpAMD64VROUNDPD128
|
||||||
|
|
@ -5401,6 +5411,15 @@ const (
|
||||||
OpCvtMask64x4to8
|
OpCvtMask64x4to8
|
||||||
OpCvtMask64x8to8
|
OpCvtMask64x8to8
|
||||||
OpIsZeroVec
|
OpIsZeroVec
|
||||||
|
OpAESDecryptLastRoundUint8x16
|
||||||
|
OpAESDecryptLastRoundUint8x32
|
||||||
|
OpAESDecryptRoundUint8x16
|
||||||
|
OpAESDecryptRoundUint8x32
|
||||||
|
OpAESEncryptLastRoundUint8x16
|
||||||
|
OpAESEncryptLastRoundUint8x32
|
||||||
|
OpAESEncryptRoundUint8x16
|
||||||
|
OpAESEncryptRoundUint8x32
|
||||||
|
OpAESInvMixColumnsUint32x4
|
||||||
OpAbsInt8x16
|
OpAbsInt8x16
|
||||||
OpAbsInt8x32
|
OpAbsInt8x32
|
||||||
OpAbsInt8x64
|
OpAbsInt8x64
|
||||||
|
|
@ -6498,6 +6517,7 @@ const (
|
||||||
OpmoveMaskedUint16x32
|
OpmoveMaskedUint16x32
|
||||||
OpmoveMaskedUint32x16
|
OpmoveMaskedUint32x16
|
||||||
OpmoveMaskedUint64x8
|
OpmoveMaskedUint64x8
|
||||||
|
OpAESRoundKeyGenAssistUint32x4
|
||||||
OpCeilScaledFloat32x4
|
OpCeilScaledFloat32x4
|
||||||
OpCeilScaledFloat32x8
|
OpCeilScaledFloat32x8
|
||||||
OpCeilScaledFloat32x16
|
OpCeilScaledFloat32x16
|
||||||
|
|
@ -20088,6 +20108,131 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VAESDEC128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVAESDEC,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VAESDEC256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVAESDEC,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VAESDECLAST128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVAESDECLAST,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VAESDECLAST256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVAESDECLAST,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VAESENC128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVAESENC,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VAESENC256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVAESENC,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VAESENCLAST128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVAESENCLAST,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VAESENCLAST256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVAESENCLAST,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VAESIMC128",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVAESIMC,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VBROADCASTSD256",
|
name: "VBROADCASTSD256",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -35714,6 +35859,20 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VAESKEYGENASSIST128",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVAESKEYGENASSIST,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VROUNDPS128",
|
name: "VROUNDPS128",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
@ -76061,6 +76220,51 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "AESDecryptLastRoundUint8x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AESDecryptLastRoundUint8x32",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AESDecryptRoundUint8x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AESDecryptRoundUint8x32",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AESEncryptLastRoundUint8x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AESEncryptLastRoundUint8x32",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AESEncryptRoundUint8x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AESEncryptRoundUint8x32",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AESInvMixColumnsUint32x4",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "AbsInt8x16",
|
name: "AbsInt8x16",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -81810,6 +82014,12 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "AESRoundKeyGenAssistUint32x4",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "CeilScaledFloat32x4",
|
name: "CeilScaledFloat32x4",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,36 @@ import "cmd/compile/internal/types"
|
||||||
|
|
||||||
func rewriteValueAMD64(v *Value) bool {
|
func rewriteValueAMD64(v *Value) bool {
|
||||||
switch v.Op {
|
switch v.Op {
|
||||||
|
case OpAESDecryptLastRoundUint8x16:
|
||||||
|
v.Op = OpAMD64VAESDECLAST128
|
||||||
|
return true
|
||||||
|
case OpAESDecryptLastRoundUint8x32:
|
||||||
|
v.Op = OpAMD64VAESDECLAST256
|
||||||
|
return true
|
||||||
|
case OpAESDecryptRoundUint8x16:
|
||||||
|
v.Op = OpAMD64VAESDEC128
|
||||||
|
return true
|
||||||
|
case OpAESDecryptRoundUint8x32:
|
||||||
|
v.Op = OpAMD64VAESDEC256
|
||||||
|
return true
|
||||||
|
case OpAESEncryptLastRoundUint8x16:
|
||||||
|
v.Op = OpAMD64VAESENCLAST128
|
||||||
|
return true
|
||||||
|
case OpAESEncryptLastRoundUint8x32:
|
||||||
|
v.Op = OpAMD64VAESENCLAST256
|
||||||
|
return true
|
||||||
|
case OpAESEncryptRoundUint8x16:
|
||||||
|
v.Op = OpAMD64VAESENC128
|
||||||
|
return true
|
||||||
|
case OpAESEncryptRoundUint8x32:
|
||||||
|
v.Op = OpAMD64VAESENC256
|
||||||
|
return true
|
||||||
|
case OpAESInvMixColumnsUint32x4:
|
||||||
|
v.Op = OpAMD64VAESIMC128
|
||||||
|
return true
|
||||||
|
case OpAESRoundKeyGenAssistUint32x4:
|
||||||
|
v.Op = OpAMD64VAESKEYGENASSIST128
|
||||||
|
return true
|
||||||
case OpAMD64ADCQ:
|
case OpAMD64ADCQ:
|
||||||
return rewriteValueAMD64_OpAMD64ADCQ(v)
|
return rewriteValueAMD64_OpAMD64ADCQ(v)
|
||||||
case OpAMD64ADCQconst:
|
case OpAMD64ADCQconst:
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,16 @@ import (
|
||||||
const simdPackage = "simd"
|
const simdPackage = "simd"
|
||||||
|
|
||||||
func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
|
func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
|
||||||
|
addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x32.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x16.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x32.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x16.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x32.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x16.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x32.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x4.AESInvMixColumns", opLen1(ssa.OpAESInvMixColumnsUint32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x4.AESRoundKeyGenAssist", opLen1Imm8(ssa.OpAESRoundKeyGenAssistUint32x4, types.TypeVec128, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.Abs", opLen1(ssa.OpAbsInt8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.Abs", opLen1(ssa.OpAbsInt8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x64.Abs", opLen1(ssa.OpAbsInt8x64, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int8x64.Abs", opLen1(ssa.OpAbsInt8x64, types.TypeVec512), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ var X86 struct {
|
||||||
HasAVX512DQ bool
|
HasAVX512DQ bool
|
||||||
HasAVX512VL bool
|
HasAVX512VL bool
|
||||||
HasAVX512GFNI bool
|
HasAVX512GFNI bool
|
||||||
|
HasAVX512VAES bool
|
||||||
HasAVX512VNNI bool
|
HasAVX512VNNI bool
|
||||||
HasAVX512VBMI bool
|
HasAVX512VBMI bool
|
||||||
HasAVX512VBMI2 bool
|
HasAVX512VBMI2 bool
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ const (
|
||||||
cpuid_AVX512VBMI2 = 1 << 6
|
cpuid_AVX512VBMI2 = 1 << 6
|
||||||
cpuid_SSSE3 = 1 << 9
|
cpuid_SSSE3 = 1 << 9
|
||||||
cpuid_AVX512GFNI = 1 << 8
|
cpuid_AVX512GFNI = 1 << 8
|
||||||
|
cpuid_AVX512VAES = 1 << 9
|
||||||
cpuid_AVX512VNNI = 1 << 11
|
cpuid_AVX512VNNI = 1 << 11
|
||||||
cpuid_AVX512BITALG = 1 << 12
|
cpuid_AVX512BITALG = 1 << 12
|
||||||
cpuid_FMA = 1 << 12
|
cpuid_FMA = 1 << 12
|
||||||
|
|
@ -182,6 +183,7 @@ func doinit() {
|
||||||
X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ)
|
X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ)
|
||||||
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512VBMI)
|
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512VBMI)
|
||||||
X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2)
|
X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2)
|
||||||
|
X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES)
|
||||||
X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI)
|
X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI)
|
||||||
X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
|
X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
|
||||||
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
|
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
|
||||||
|
|
|
||||||
|
|
@ -563,7 +563,10 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
|
||||||
}
|
}
|
||||||
featureSet := make(map[featureKey]struct{})
|
featureSet := make(map[featureKey]struct{})
|
||||||
for _, op := range ops {
|
for _, op := range ops {
|
||||||
featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{}
|
if !strings.Contains(op.CPUFeature, ",") {
|
||||||
|
featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{}
|
||||||
|
}
|
||||||
|
// Don't generate feature checks for composite features.
|
||||||
}
|
}
|
||||||
features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
|
features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
|
||||||
if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
|
if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
|
||||||
|
|
|
||||||
|
|
@ -3,3 +3,47 @@
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// NAME counts the leading zeros of each element in x.
|
// NAME counts the leading zeros of each element in x.
|
||||||
|
- go: AESEncryptRound
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of w array in use.
|
||||||
|
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
|
||||||
|
- go: AESEncryptLastRound
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of w array in use.
|
||||||
|
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
|
||||||
|
- go: AESRoundKeyGenAssist
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is an array of AES words, but only x[0] and x[2] are used.
|
||||||
|
// r is a value from the Rcon constant array.
|
||||||
|
// result[0] = XOR(SubWord(RotWord(x[0])), r)
|
||||||
|
// result[1] = SubWord(x[1])
|
||||||
|
// result[2] = XOR(SubWord(RotWord(x[2])), r)
|
||||||
|
// result[3] = SubWord(x[3])
|
||||||
|
- go: AESDecryptRound
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of dw array in use.
|
||||||
|
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
|
||||||
|
- go: AESDecryptLastRound
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of dw array in use.
|
||||||
|
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
|
||||||
|
- go: AESInvMixColumns
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the chunk of w array in use.
|
||||||
|
// result = InvMixColumns(x)
|
||||||
|
|
@ -6,3 +6,50 @@
|
||||||
go: $t
|
go: $t
|
||||||
out:
|
out:
|
||||||
- *any
|
- *any
|
||||||
|
- go: AESEncryptRound
|
||||||
|
asm: VAESENC
|
||||||
|
in:
|
||||||
|
- &uint8s
|
||||||
|
base: uint
|
||||||
|
overwriteElementBits: 8
|
||||||
|
- &uint32s
|
||||||
|
base: uint
|
||||||
|
overwriteElementBits: 32
|
||||||
|
out:
|
||||||
|
- *uint8s
|
||||||
|
- go: AESEncryptLastRound
|
||||||
|
asm: VAESENCLAST
|
||||||
|
in:
|
||||||
|
- *uint8s
|
||||||
|
- *uint32s
|
||||||
|
out:
|
||||||
|
- *uint8s
|
||||||
|
- go: AESRoundKeyGenAssist
|
||||||
|
asm: VAESKEYGENASSIST
|
||||||
|
in:
|
||||||
|
- *uint32s
|
||||||
|
- class: immediate
|
||||||
|
immOffset: 0
|
||||||
|
name: rconVal
|
||||||
|
out:
|
||||||
|
- *uint32s
|
||||||
|
- go: AESDecryptRound
|
||||||
|
asm: VAESDEC
|
||||||
|
in:
|
||||||
|
- *uint8s
|
||||||
|
- *uint32s
|
||||||
|
out:
|
||||||
|
- *uint8s
|
||||||
|
- go: AESDecryptLastRound
|
||||||
|
asm: VAESDECLAST
|
||||||
|
in:
|
||||||
|
- *uint8s
|
||||||
|
- *uint32s
|
||||||
|
out:
|
||||||
|
- *uint8s
|
||||||
|
- go: AESInvMixColumns
|
||||||
|
asm: VAESIMC
|
||||||
|
in:
|
||||||
|
- *uint32s
|
||||||
|
out:
|
||||||
|
- *uint32s
|
||||||
|
|
@ -770,6 +770,7 @@ var cpuFeatureMap = map[cpuFeatureKey]string{
|
||||||
{"AVX", ""}: "AVX",
|
{"AVX", ""}: "AVX",
|
||||||
{"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
|
{"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
|
||||||
{"AVX2", ""}: "AVX2",
|
{"AVX2", ""}: "AVX2",
|
||||||
|
{"AVXAES", ""}: "AVX, AES",
|
||||||
|
|
||||||
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
|
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
|
||||||
{"AVX512EVEX", "AVX512F"}: "AVX512",
|
{"AVX512EVEX", "AVX512F"}: "AVX512",
|
||||||
|
|
@ -786,6 +787,7 @@ var cpuFeatureMap = map[cpuFeatureKey]string{
|
||||||
{"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
|
{"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
|
||||||
{"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
|
{"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
|
||||||
{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
|
{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
|
||||||
|
{"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
|
||||||
|
|
||||||
// AVX 10.2 (not yet supported)
|
// AVX 10.2 (not yet supported)
|
||||||
{"AVX512EVEX", "AVX10_2_RC"}: "ignore",
|
{"AVX512EVEX", "AVX10_2_RC"}: "ignore",
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,14 @@ func HasAVX512GFNI() bool {
|
||||||
return cpu.X86.HasAVX512GFNI
|
return cpu.X86.HasAVX512GFNI
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HasAVX512VAES returns whether the CPU supports the AVX512VAES feature.
|
||||||
|
//
|
||||||
|
// HasAVX512VAES is defined on all GOARCHes, but will only return true on
|
||||||
|
// GOARCH amd64.
|
||||||
|
func HasAVX512VAES() bool {
|
||||||
|
return cpu.X86.HasAVX512VAES
|
||||||
|
}
|
||||||
|
|
||||||
// HasAVX512VBMI returns whether the CPU supports the AVX512VBMI feature.
|
// HasAVX512VBMI returns whether the CPU supports the AVX512VBMI feature.
|
||||||
//
|
//
|
||||||
// HasAVX512VBMI is defined on all GOARCHes, but will only return true on
|
// HasAVX512VBMI is defined on all GOARCHes, but will only return true on
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,102 @@
|
||||||
|
|
||||||
package simd
|
package simd
|
||||||
|
|
||||||
|
/* AESDecryptLastRound */
|
||||||
|
|
||||||
|
// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of dw array in use.
|
||||||
|
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
|
||||||
|
//
|
||||||
|
// Asm: VAESDECLAST, CPU Feature: AVX, AES
|
||||||
|
func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
|
||||||
|
|
||||||
|
// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of dw array in use.
|
||||||
|
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
|
||||||
|
//
|
||||||
|
// Asm: VAESDECLAST, CPU Feature: AVX512VAES
|
||||||
|
func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32
|
||||||
|
|
||||||
|
/* AESDecryptRound */
|
||||||
|
|
||||||
|
// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of dw array in use.
|
||||||
|
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
|
||||||
|
//
|
||||||
|
// Asm: VAESDEC, CPU Feature: AVX, AES
|
||||||
|
func (x Uint8x16) AESDecryptRound(y Uint32x4) Uint8x16
|
||||||
|
|
||||||
|
// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of dw array in use.
|
||||||
|
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
|
||||||
|
//
|
||||||
|
// Asm: VAESDEC, CPU Feature: AVX512VAES
|
||||||
|
func (x Uint8x32) AESDecryptRound(y Uint32x8) Uint8x32
|
||||||
|
|
||||||
|
/* AESEncryptLastRound */
|
||||||
|
|
||||||
|
// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of w array in use.
|
||||||
|
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
|
||||||
|
//
|
||||||
|
// Asm: VAESENCLAST, CPU Feature: AVX, AES
|
||||||
|
func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
|
||||||
|
|
||||||
|
// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of w array in use.
|
||||||
|
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
|
||||||
|
//
|
||||||
|
// Asm: VAESENCLAST, CPU Feature: AVX512VAES
|
||||||
|
func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32
|
||||||
|
|
||||||
|
/* AESEncryptRound */
|
||||||
|
|
||||||
|
// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of w array in use.
|
||||||
|
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
|
||||||
|
//
|
||||||
|
// Asm: VAESENC, CPU Feature: AVX, AES
|
||||||
|
func (x Uint8x16) AESEncryptRound(y Uint32x4) Uint8x16
|
||||||
|
|
||||||
|
// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
|
||||||
|
// y is the chunk of w array in use.
|
||||||
|
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
|
||||||
|
//
|
||||||
|
// Asm: VAESENC, CPU Feature: AVX512VAES
|
||||||
|
func (x Uint8x32) AESEncryptRound(y Uint32x8) Uint8x32
|
||||||
|
|
||||||
|
/* AESInvMixColumns */
|
||||||
|
|
||||||
|
// AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is the chunk of w array in use.
|
||||||
|
// result = InvMixColumns(x)
|
||||||
|
//
|
||||||
|
// Asm: VAESIMC, CPU Feature: AVX, AES
|
||||||
|
func (x Uint32x4) AESInvMixColumns() Uint32x4
|
||||||
|
|
||||||
|
/* AESRoundKeyGenAssist */
|
||||||
|
|
||||||
|
// AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
|
||||||
|
// x is an array of AES words, but only x[0] and x[2] are used.
|
||||||
|
// r is a value from the Rcon constant array.
|
||||||
|
// result[0] = XOR(SubWord(RotWord(x[0])), r)
|
||||||
|
// result[1] = SubWord(x[1])
|
||||||
|
// result[2] = XOR(SubWord(RotWord(x[2])), r)
|
||||||
|
// result[3] = SubWord(x[3])
|
||||||
|
//
|
||||||
|
// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
|
||||||
|
func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4
|
||||||
|
|
||||||
/* Abs */
|
/* Abs */
|
||||||
|
|
||||||
// Abs computes the absolute value of each element.
|
// Abs computes the absolute value of each element.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue