diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index a4d24524357..de9cad8a478 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -12,7 +12,8 @@ import ( func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { var p *obj.Prog switch v.Op { - case ssa.OpAMD64VPABSB128, + case ssa.OpAMD64VAESIMC128, + ssa.OpAMD64VPABSB128, ssa.OpAMD64VPABSB256, ssa.OpAMD64VPABSB512, ssa.OpAMD64VPABSW128, @@ -148,7 +149,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VSQRTPD512: p = simdV11(s, v) - case ssa.OpAMD64VADDPS128, + case ssa.OpAMD64VAESDECLAST128, + ssa.OpAMD64VAESDECLAST256, + ssa.OpAMD64VAESDEC128, + ssa.OpAMD64VAESDEC256, + ssa.OpAMD64VAESENCLAST128, + ssa.OpAMD64VAESENCLAST256, + ssa.OpAMD64VAESENC128, + ssa.OpAMD64VAESENC256, + ssa.OpAMD64VADDPS128, ssa.OpAMD64VADDPS256, ssa.OpAMD64VADDPS512, ssa.OpAMD64VADDPD128, @@ -917,7 +926,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPBLENDVB256: p = simdV31(s, v) - case ssa.OpAMD64VROUNDPS128, + case ssa.OpAMD64VAESKEYGENASSIST128, + ssa.OpAMD64VROUNDPS128, ssa.OpAMD64VROUNDPS256, ssa.OpAMD64VROUNDPD128, ssa.OpAMD64VROUNDPD256, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 1eab8b5e6d6..d9229e958ad 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1,5 +1,15 @@ // Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +(AESDecryptLastRoundUint8x16 ...) => (VAESDECLAST128 ...) +(AESDecryptLastRoundUint8x32 ...) => (VAESDECLAST256 ...) +(AESDecryptRoundUint8x16 ...) => (VAESDEC128 ...) +(AESDecryptRoundUint8x32 ...) => (VAESDEC256 ...) +(AESEncryptLastRoundUint8x16 ...) => (VAESENCLAST128 ...) +(AESEncryptLastRoundUint8x32 ...) => (VAESENCLAST256 ...) +(AESEncryptRoundUint8x16 ...) => (VAESENC128 ...) +(AESEncryptRoundUint8x32 ...) => (VAESENC256 ...) +(AESInvMixColumnsUint32x4 ...) => (VAESIMC128 ...) +(AESRoundKeyGenAssistUint32x4 ...) => (VAESKEYGENASSIST128 ...) (AbsInt8x16 ...) => (VPABSB128 ...) (AbsInt8x32 ...) => (VPABSB256 ...) (AbsInt8x64 ...) => (VPABSB512 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 5e1da3249fe..680c576bb14 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -21,6 +21,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VADDSUBPD256", argLength: 2, reg: v21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESDEC128", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VAESDEC256", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESDECLAST128", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VAESDECLAST256", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESENC128", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VAESENC256", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESENCLAST128", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VAESENCLAST256", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESIMC128", argLength: 1, reg: v11, asm: "VAESIMC", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VBROADCASTSDMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -1084,6 +1093,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VSUBPSMasked128", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VSUBPSMasked256", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VSUBPSMasked512", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VAESKEYGENASSIST128", argLength: 1, reg: v11, asm: "VAESKEYGENASSIST", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VROUNDPS128", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VROUNDPS256", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VROUNDPD128", argLength: 1, reg: v11, asm: "VROUNDPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index aa088dbf0bf..2e9f3ff1c49 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -4,6 +4,15 @@ package main func simdGenericOps() []opData { return []opData{ + {name: "AESDecryptLastRoundUint8x16", argLength: 2, commutative: false}, + {name: "AESDecryptLastRoundUint8x32", argLength: 2, commutative: false}, + {name: "AESDecryptRoundUint8x16", argLength: 2, commutative: false}, + {name: "AESDecryptRoundUint8x32", argLength: 2, commutative: false}, + {name: "AESEncryptLastRoundUint8x16", argLength: 2, commutative: false}, + {name: "AESEncryptLastRoundUint8x32", argLength: 2, commutative: false}, + {name: "AESEncryptRoundUint8x16", argLength: 2, commutative: false}, + {name: "AESEncryptRoundUint8x32", argLength: 2, commutative: false}, + {name: "AESInvMixColumnsUint32x4", argLength: 1, commutative: false}, {name: "AbsInt8x16", argLength: 1, commutative: false}, {name: "AbsInt8x32", argLength: 1, commutative: false}, {name: "AbsInt8x64", argLength: 1, commutative: false}, @@ -1101,6 +1110,7 @@ func simdGenericOps() []opData { {name: "moveMaskedUint16x32", argLength: 2, commutative: false}, {name: "moveMaskedUint32x16", argLength: 2, commutative: false}, {name: "moveMaskedUint64x8", argLength: 2, commutative: false}, + {name: "AESRoundKeyGenAssistUint32x4", argLength: 1, commutative: false, aux: "UInt8"}, {name: "CeilScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"}, {name: "CeilScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"}, {name: "CeilScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 105d1a803c6..7e44a31956a 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1253,6 +1253,15 @@ const ( OpAMD64VADDSUBPD256 OpAMD64VADDSUBPS128 OpAMD64VADDSUBPS256 + OpAMD64VAESDEC128 + OpAMD64VAESDEC256 + OpAMD64VAESDECLAST128 + OpAMD64VAESDECLAST256 + OpAMD64VAESENC128 + OpAMD64VAESENC256 + OpAMD64VAESENCLAST128 + OpAMD64VAESENCLAST256 + OpAMD64VAESIMC128 OpAMD64VBROADCASTSD256 OpAMD64VBROADCASTSD512 OpAMD64VBROADCASTSDMasked256 @@ -2316,6 +2325,7 @@ const ( OpAMD64VSUBPSMasked128 OpAMD64VSUBPSMasked256 OpAMD64VSUBPSMasked512 + OpAMD64VAESKEYGENASSIST128 OpAMD64VROUNDPS128 OpAMD64VROUNDPS256 OpAMD64VROUNDPD128 @@ -5401,6 +5411,15 @@ const ( OpCvtMask64x4to8 OpCvtMask64x8to8 OpIsZeroVec + OpAESDecryptLastRoundUint8x16 + OpAESDecryptLastRoundUint8x32 + OpAESDecryptRoundUint8x16 + OpAESDecryptRoundUint8x32 + OpAESEncryptLastRoundUint8x16 + OpAESEncryptLastRoundUint8x32 + OpAESEncryptRoundUint8x16 + OpAESEncryptRoundUint8x32 + OpAESInvMixColumnsUint32x4 OpAbsInt8x16 OpAbsInt8x32 OpAbsInt8x64 @@ -6498,6 +6517,7 @@ const ( OpmoveMaskedUint16x32 OpmoveMaskedUint32x16 OpmoveMaskedUint64x8 + OpAESRoundKeyGenAssistUint32x4 OpCeilScaledFloat32x4 OpCeilScaledFloat32x8 OpCeilScaledFloat32x16 @@ -20088,6 +20108,131 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VAESDEC128", + argLen: 2, + asm: x86.AVAESDEC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VAESDEC256", + argLen: 2, + asm: x86.AVAESDEC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VAESDECLAST128", + argLen: 2, + asm: x86.AVAESDECLAST, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VAESDECLAST256", + argLen: 2, + asm: x86.AVAESDECLAST, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VAESENC128", + argLen: 2, + asm: x86.AVAESENC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VAESENC256", + argLen: 2, + asm: x86.AVAESENC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VAESENCLAST128", + argLen: 2, + asm: x86.AVAESENCLAST, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VAESENCLAST256", + argLen: 2, + asm: x86.AVAESENCLAST, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VAESIMC128", + argLen: 1, + asm: x86.AVAESIMC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VBROADCASTSD256", argLen: 1, @@ -35714,6 +35859,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VAESKEYGENASSIST128", + auxType: auxUInt8, + argLen: 1, + asm: x86.AVAESKEYGENASSIST, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VROUNDPS128", auxType: auxUInt8, @@ -76061,6 +76220,51 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "AESDecryptLastRoundUint8x16", + argLen: 2, + generic: true, + }, + { + name: "AESDecryptLastRoundUint8x32", + argLen: 2, + generic: true, + }, + { + name: "AESDecryptRoundUint8x16", + argLen: 2, + generic: true, + }, + { + name: "AESDecryptRoundUint8x32", + argLen: 2, + generic: true, + }, + { + name: "AESEncryptLastRoundUint8x16", + argLen: 2, + generic: true, + }, + { + name: "AESEncryptLastRoundUint8x32", + argLen: 2, + generic: true, + }, + { + name: "AESEncryptRoundUint8x16", + argLen: 2, + generic: true, + }, + { + name: "AESEncryptRoundUint8x32", + argLen: 2, + generic: true, + }, + { + name: "AESInvMixColumnsUint32x4", + argLen: 1, + generic: true, + }, { name: "AbsInt8x16", argLen: 1, @@ -81810,6 +82014,12 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "AESRoundKeyGenAssistUint32x4", + auxType: auxUInt8, + argLen: 1, + generic: true, + }, { name: "CeilScaledFloat32x4", auxType: auxUInt8, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index bc611fc44c4..84bb4c11487 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -9,6 +9,36 @@ import "cmd/compile/internal/types" func rewriteValueAMD64(v *Value) bool { switch v.Op { + case OpAESDecryptLastRoundUint8x16: + v.Op = OpAMD64VAESDECLAST128 + return true + case OpAESDecryptLastRoundUint8x32: + v.Op = OpAMD64VAESDECLAST256 + return true + case OpAESDecryptRoundUint8x16: + v.Op = OpAMD64VAESDEC128 + return true + case OpAESDecryptRoundUint8x32: + v.Op = OpAMD64VAESDEC256 + return true + case OpAESEncryptLastRoundUint8x16: + v.Op = OpAMD64VAESENCLAST128 + return true + case OpAESEncryptLastRoundUint8x32: + v.Op = OpAMD64VAESENCLAST256 + return true + case OpAESEncryptRoundUint8x16: + v.Op = OpAMD64VAESENC128 + return true + case OpAESEncryptRoundUint8x32: + v.Op = OpAMD64VAESENC256 + return true + case OpAESInvMixColumnsUint32x4: + v.Op = OpAMD64VAESIMC128 + return true + case OpAESRoundKeyGenAssistUint32x4: + v.Op = OpAMD64VAESKEYGENASSIST128 + return true case OpAMD64ADCQ: return rewriteValueAMD64_OpAMD64ADCQ(v) case OpAMD64ADCQconst: diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index a62b3882c38..f2e82d234cd 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -12,6 +12,16 @@ import ( const simdPackage = "simd" func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { + addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.AESInvMixColumns", opLen1(ssa.OpAESInvMixColumnsUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x4.AESRoundKeyGenAssist", opLen1Imm8(ssa.OpAESRoundKeyGenAssistUint32x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Abs", opLen1(ssa.OpAbsInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.Abs", opLen1(ssa.OpAbsInt8x64, types.TypeVec512), sys.AMD64) diff --git a/src/internal/cpu/cpu.go b/src/internal/cpu/cpu.go index de27e89fc2f..4dffeadb228 100644 --- a/src/internal/cpu/cpu.go +++ b/src/internal/cpu/cpu.go @@ -34,6 +34,7 @@ var X86 struct { HasAVX512DQ bool HasAVX512VL bool HasAVX512GFNI bool + HasAVX512VAES bool HasAVX512VNNI bool HasAVX512VBMI bool HasAVX512VBMI2 bool diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go index ef1874ad68c..4610ce807ee 100644 --- a/src/internal/cpu/cpu_x86.go +++ b/src/internal/cpu/cpu_x86.go @@ -28,6 +28,7 @@ const ( cpuid_AVX512VBMI2 = 1 << 6 cpuid_SSSE3 = 1 << 9 cpuid_AVX512GFNI = 1 << 8 + cpuid_AVX512VAES = 1 << 9 cpuid_AVX512VNNI = 1 << 11 cpuid_AVX512BITALG = 1 << 12 cpuid_FMA = 1 << 12 @@ -182,6 +183,7 @@ func doinit() { X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ) X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512VBMI) X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2) + X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES) X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI) X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ) X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI) diff --git a/src/simd/_gen/simdgen/gen_simdTypes.go b/src/simd/_gen/simdgen/gen_simdTypes.go index 8944c35cad7..f13be87f7b1 100644 --- a/src/simd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/_gen/simdgen/gen_simdTypes.go @@ -563,7 +563,10 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer { } featureSet := make(map[featureKey]struct{}) for _, op := range ops { - featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{} + if !strings.Contains(op.CPUFeature, ",") { + featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{} + } + // Don't generate feature checks for composite features. } features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int { if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 { diff --git a/src/simd/_gen/simdgen/ops/Others/categories.yaml b/src/simd/_gen/simdgen/ops/Others/categories.yaml index 4489f4f403f..dd922fb14b1 100644 --- a/src/simd/_gen/simdgen/ops/Others/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Others/categories.yaml @@ -3,3 +3,47 @@ commutative: false documentation: !string |- // NAME counts the leading zeros of each element in x. +- go: AESEncryptRound + commutative: false + documentation: !string |- + // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197. + // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. + // y is the chunk of w array in use. + // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) +- go: AESEncryptLastRound + commutative: false + documentation: !string |- + // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197. + // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. + // y is the chunk of w array in use. + // result = AddRoundKey((ShiftRows(SubBytes(x))), y) +- go: AESRoundKeyGenAssist + commutative: false + documentation: !string |- + // NAME performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197. + // x is an array of AES words, but only x[0] and x[2] are used. + // r is a value from the Rcon constant array. + // result[0] = XOR(SubWord(RotWord(x[0])), r) + // result[1] = SubWord(x[1]) + // result[2] = XOR(SubWord(RotWord(x[2])), r) + // result[3] = SubWord(x[3]) +- go: AESDecryptRound + commutative: false + documentation: !string |- + // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197. + // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. + // y is the chunk of dw array in use. + // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) +- go: AESDecryptLastRound + commutative: false + documentation: !string |- + // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197. + // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. + // y is the chunk of dw array in use. + // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) +- go: AESInvMixColumns + commutative: false + documentation: !string |- + // NAME performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197. + // x is the chunk of w array in use. + // result = InvMixColumns(x) \ No newline at end of file diff --git a/src/simd/_gen/simdgen/ops/Others/go.yaml b/src/simd/_gen/simdgen/ops/Others/go.yaml index a4fd87407b6..0f8b7b43a26 100644 --- a/src/simd/_gen/simdgen/ops/Others/go.yaml +++ b/src/simd/_gen/simdgen/ops/Others/go.yaml @@ -6,3 +6,50 @@ go: $t out: - *any +- go: AESEncryptRound + asm: VAESENC + in: + - &uint8s + base: uint + overwriteElementBits: 8 + - &uint32s + base: uint + overwriteElementBits: 32 + out: + - *uint8s +- go: AESEncryptLastRound + asm: VAESENCLAST + in: + - *uint8s + - *uint32s + out: + - *uint8s +- go: AESRoundKeyGenAssist + asm: VAESKEYGENASSIST + in: + - *uint32s + - class: immediate + immOffset: 0 + name: rconVal + out: + - *uint32s +- go: AESDecryptRound + asm: VAESDEC + in: + - *uint8s + - *uint32s + out: + - *uint8s +- go: AESDecryptLastRound + asm: VAESDECLAST + in: + - *uint8s + - *uint32s + out: + - *uint8s +- go: AESInvMixColumns + asm: VAESIMC + in: + - *uint32s + out: + - *uint32s \ No newline at end of file diff --git a/src/simd/_gen/simdgen/xed.go b/src/simd/_gen/simdgen/xed.go index e521f0c8d44..1781f5c74d0 100644 --- a/src/simd/_gen/simdgen/xed.go +++ b/src/simd/_gen/simdgen/xed.go @@ -770,6 +770,7 @@ var cpuFeatureMap = map[cpuFeatureKey]string{ {"AVX", ""}: "AVX", {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI", {"AVX2", ""}: "AVX2", + {"AVXAES", ""}: "AVX, AES", // AVX-512 foundational features. We combine all of these into one "AVX512" feature. {"AVX512EVEX", "AVX512F"}: "AVX512", @@ -786,6 +787,7 @@ var cpuFeatureMap = map[cpuFeatureKey]string{ {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI", {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI", {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ", + {"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES", // AVX 10.2 (not yet supported) {"AVX512EVEX", "AVX10_2_RC"}: "ignore", diff --git a/src/simd/cpu.go b/src/simd/cpu.go index cbde9a8e1ff..2837c76d321 100644 --- a/src/simd/cpu.go +++ b/src/simd/cpu.go @@ -51,6 +51,14 @@ func HasAVX512GFNI() bool { return cpu.X86.HasAVX512GFNI } +// HasAVX512VAES returns whether the CPU supports the AVX512VAES feature. +// +// HasAVX512VAES is defined on all GOARCHes, but will only return true on +// GOARCH amd64. +func HasAVX512VAES() bool { + return cpu.X86.HasAVX512VAES +} + // HasAVX512VBMI returns whether the CPU supports the AVX512VBMI feature. // // HasAVX512VBMI is defined on all GOARCHes, but will only return true on diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 17f45e6bf5e..8956c2e0772 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -4,6 +4,102 @@ package simd +/* AESDecryptLastRound */ + +// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) +// +// Asm: VAESDECLAST, CPU Feature: AVX, AES +func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16 + +// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) +// +// Asm: VAESDECLAST, CPU Feature: AVX512VAES +func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32 + +/* AESDecryptRound */ + +// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) +// +// Asm: VAESDEC, CPU Feature: AVX, AES +func (x Uint8x16) AESDecryptRound(y Uint32x4) Uint8x16 + +// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) +// +// Asm: VAESDEC, CPU Feature: AVX512VAES +func (x Uint8x32) AESDecryptRound(y Uint32x8) Uint8x32 + +/* AESEncryptLastRound */ + +// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey((ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENCLAST, CPU Feature: AVX, AES +func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16 + +// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey((ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENCLAST, CPU Feature: AVX512VAES +func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32 + +/* AESEncryptRound */ + +// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENC, CPU Feature: AVX, AES +func (x Uint8x16) AESEncryptRound(y Uint32x4) Uint8x16 + +// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENC, CPU Feature: AVX512VAES +func (x Uint8x32) AESEncryptRound(y Uint32x8) Uint8x32 + +/* AESInvMixColumns */ + +// AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197. +// x is the chunk of w array in use. +// result = InvMixColumns(x) +// +// Asm: VAESIMC, CPU Feature: AVX, AES +func (x Uint32x4) AESInvMixColumns() Uint32x4 + +/* AESRoundKeyGenAssist */ + +// AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197. +// x is an array of AES words, but only x[0] and x[2] are used. +// r is a value from the Rcon constant array. +// result[0] = XOR(SubWord(RotWord(x[0])), r) +// result[1] = SubWord(x[1]) +// result[2] = XOR(SubWord(RotWord(x[2])), r) +// result[3] = SubWord(x[3]) +// +// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES +func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4 + /* Abs */ // Abs computes the absolute value of each element.