mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add SHA features
This CL also fixed some bugs left in CL 712181. Change-Id: I9cb6cd9fbaef307f352809bf21b8fec3eb62721a Reviewed-on: https://go-review.googlesource.com/c/go/+/712361 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
2b8eded4f4
commit
cf7c1a4cbb
22 changed files with 843 additions and 235 deletions
|
|
@ -1955,6 +1955,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPTERNLOGQ512load:
|
||||
p = simdV31loadResultInArg0Imm8(s, v)
|
||||
|
||||
case ssa.OpAMD64SHA1MSG1128,
|
||||
ssa.OpAMD64SHA1MSG2128,
|
||||
ssa.OpAMD64SHA1NEXTE128,
|
||||
ssa.OpAMD64SHA256MSG1128:
|
||||
p = simdV21ResultInArg0(s, v)
|
||||
|
||||
case ssa.OpAMD64SHA1RNDS4128:
|
||||
p = simdV21ResultInArg0Imm8(s, v)
|
||||
|
||||
case ssa.OpAMD64SHA256RNDS2128:
|
||||
p = simdV31x0AtIn2ResultInArg0(s, v)
|
||||
|
||||
default:
|
||||
// Unknown reg shape
|
||||
return false
|
||||
|
|
|
|||
|
|
@ -2349,6 +2349,32 @@ func simdV2kvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
|||
return p
|
||||
}
|
||||
|
||||
// Example instruction: SHA1NEXTE X2, X2
|
||||
func simdV21ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v.Args[1])
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
return p
|
||||
}
|
||||
|
||||
// Example instruction: SHA1RNDS4 $1, X2, X2
|
||||
func simdV21ResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Offset = int64(v.AuxUInt8())
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.AddRestSourceReg(simdReg(v.Args[1]))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
return p
|
||||
}
|
||||
|
||||
// Example instruction: SHA256RNDS2 X0, X11, X2
|
||||
func simdV31x0AtIn2ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||
return simdV31ResultInArg0(s, v)
|
||||
}
|
||||
|
||||
var blockJump = [...]struct {
|
||||
asm, invasm obj.As
|
||||
}{
|
||||
|
|
|
|||
|
|
@ -135,6 +135,7 @@ func init() {
|
|||
|
||||
vz = v | x15
|
||||
wz = w | x15
|
||||
x0 = buildReg("X0")
|
||||
)
|
||||
// Common slices of register masks
|
||||
var (
|
||||
|
|
@ -213,7 +214,7 @@ func init() {
|
|||
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
|
||||
|
||||
v11 = regInfo{inputs: vzonly, outputs: vonly}
|
||||
v21 = regInfo{inputs: []regMask{vz, vz}, outputs: vonly}
|
||||
v21 = regInfo{inputs: []regMask{v, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
vk = regInfo{inputs: vzonly, outputs: maskonly}
|
||||
kv = regInfo{inputs: maskonly, outputs: vonly}
|
||||
v2k = regInfo{inputs: []regMask{vz, vz}, outputs: maskonly}
|
||||
|
|
@ -247,17 +248,18 @@ func init() {
|
|||
|
||||
// These register masks are used by SIMD only, they follow the pattern:
|
||||
// Mem last, k mask second to last (if any), address right before mem and k mask.
|
||||
wkwload = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: wonly}
|
||||
v21load = regInfo{inputs: []regMask{vz, gpspsb, 0}, outputs: vonly}
|
||||
v31load = regInfo{inputs: []regMask{v, vz, gpspsb, 0}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
v11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: vonly}
|
||||
w21load = regInfo{inputs: []regMask{wz, gpspsb, 0}, outputs: wonly}
|
||||
w31load = regInfo{inputs: []regMask{w, wz, gpspsb, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
w2kload = regInfo{inputs: []regMask{wz, gpspsb, 0}, outputs: maskonly}
|
||||
w2kwload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: wonly}
|
||||
w11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: wonly}
|
||||
w3kwload = regInfo{inputs: []regMask{w, wz, gpspsb, mask, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
w2kkload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: maskonly}
|
||||
wkwload = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: wonly}
|
||||
v21load = regInfo{inputs: []regMask{v, gpspsb, 0}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
v31load = regInfo{inputs: []regMask{v, vz, gpspsb, 0}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
v11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: vonly}
|
||||
w21load = regInfo{inputs: []regMask{wz, gpspsb, 0}, outputs: wonly}
|
||||
w31load = regInfo{inputs: []regMask{w, wz, gpspsb, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
w2kload = regInfo{inputs: []regMask{wz, gpspsb, 0}, outputs: maskonly}
|
||||
w2kwload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: wonly}
|
||||
w11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: wonly}
|
||||
w3kwload = regInfo{inputs: []regMask{w, wz, gpspsb, mask, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
w2kkload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: maskonly}
|
||||
v31x0AtIn2 = regInfo{inputs: []regMask{v, vz, x0}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
|
||||
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
|
||||
kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
|
||||
|
|
@ -1477,7 +1479,7 @@ func init() {
|
|||
genSIMDfile: "../../amd64/simdssa.go",
|
||||
ops: append(AMD64ops, simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv,
|
||||
w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, wkwload, v21load, v31load, v11load,
|
||||
w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload)...), // AMD64ops,
|
||||
w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload, v31x0AtIn2)...), // AMD64ops,
|
||||
blocks: AMD64blocks,
|
||||
regnames: regNamesAMD64,
|
||||
ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11",
|
||||
|
|
|
|||
|
|
@ -939,6 +939,20 @@
|
|||
(RoundToEvenScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
|
||||
(RoundToEvenScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
|
||||
(RoundToEvenScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
|
||||
(SHA1Msg1Int32x4 ...) => (SHA1MSG1128 ...)
|
||||
(SHA1Msg1Uint32x4 ...) => (SHA1MSG1128 ...)
|
||||
(SHA1Msg2Int32x4 ...) => (SHA1MSG2128 ...)
|
||||
(SHA1Msg2Uint32x4 ...) => (SHA1MSG2128 ...)
|
||||
(SHA1NextEInt32x4 ...) => (SHA1NEXTE128 ...)
|
||||
(SHA1NextEUint32x4 ...) => (SHA1NEXTE128 ...)
|
||||
(SHA1Round4Int32x4 ...) => (SHA1RNDS4128 ...)
|
||||
(SHA1Round4Uint32x4 ...) => (SHA1RNDS4128 ...)
|
||||
(SHA256Msg1Int32x4 ...) => (SHA256MSG1128 ...)
|
||||
(SHA256Msg1Uint32x4 ...) => (SHA256MSG1128 ...)
|
||||
(SHA256Msg2Int32x4 ...) => (SHA256MSG1128 ...)
|
||||
(SHA256Msg2Uint32x4 ...) => (SHA256MSG1128 ...)
|
||||
(SHA256Rounds2Int32x4 ...) => (SHA256RNDS2128 ...)
|
||||
(SHA256Rounds2Uint32x4 ...) => (SHA256RNDS2128 ...)
|
||||
(ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
|
||||
(ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
|
||||
(ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
|
||||
|
|
|
|||
|
|
@ -3,8 +3,13 @@
|
|||
package main
|
||||
|
||||
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
|
||||
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload regInfo) []opData {
|
||||
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload, v31x0AtIn2 regInfo) []opData {
|
||||
return []opData{
|
||||
{name: "SHA1MSG1128", argLength: 2, reg: v21, asm: "SHA1MSG1", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "SHA1MSG2128", argLength: 2, reg: v21, asm: "SHA1MSG2", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "SHA1NEXTE128", argLength: 2, reg: v21, asm: "SHA1NEXTE", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "SHA256MSG1128", argLength: 2, reg: v21, asm: "SHA256MSG1", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "SHA256RNDS2128", argLength: 3, reg: v31x0AtIn2, asm: "SHA256RNDS2", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VADDPD128", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VADDPD256", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VADDPD512", argLength: 2, reg: w21, asm: "VADDPD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
|
|
@ -1216,6 +1221,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VPRORQMasked128", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPRORQMasked256", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPRORQMasked512", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "SHA1RNDS4128", argLength: 2, reg: v21, asm: "SHA1RNDS4", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPERM2F128256", argLength: 2, reg: v21, asm: "VPERM2F128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPERM2I128256", argLength: 2, reg: v21, asm: "VPERM2I128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPINSRD128", argLength: 2, reg: vgpv, asm: "VPINSRD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -844,6 +844,18 @@ func simdGenericOps() []opData {
|
|||
{name: "RoundToEvenFloat32x8", argLength: 1, commutative: false},
|
||||
{name: "RoundToEvenFloat64x2", argLength: 1, commutative: false},
|
||||
{name: "RoundToEvenFloat64x4", argLength: 1, commutative: false},
|
||||
{name: "SHA1Msg1Int32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA1Msg1Uint32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA1Msg2Int32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA1Msg2Uint32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA1NextEInt32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA1NextEUint32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA256Msg1Int32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA256Msg1Uint32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA256Msg2Int32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA256Msg2Uint32x4", argLength: 2, commutative: false},
|
||||
{name: "SHA256Rounds2Int32x4", argLength: 3, commutative: false},
|
||||
{name: "SHA256Rounds2Uint32x4", argLength: 3, commutative: false},
|
||||
{name: "ScaleFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "ScaleFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "ScaleFloat32x16", argLength: 2, commutative: false},
|
||||
|
|
@ -1206,6 +1218,8 @@ func simdGenericOps() []opData {
|
|||
{name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "SHA1Round4Int32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SHA1Round4Uint32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -4978,6 +4978,48 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpRsh8x64(v)
|
||||
case OpRsh8x8:
|
||||
return rewriteValueAMD64_OpRsh8x8(v)
|
||||
case OpSHA1Msg1Int32x4:
|
||||
v.Op = OpAMD64SHA1MSG1128
|
||||
return true
|
||||
case OpSHA1Msg1Uint32x4:
|
||||
v.Op = OpAMD64SHA1MSG1128
|
||||
return true
|
||||
case OpSHA1Msg2Int32x4:
|
||||
v.Op = OpAMD64SHA1MSG2128
|
||||
return true
|
||||
case OpSHA1Msg2Uint32x4:
|
||||
v.Op = OpAMD64SHA1MSG2128
|
||||
return true
|
||||
case OpSHA1NextEInt32x4:
|
||||
v.Op = OpAMD64SHA1NEXTE128
|
||||
return true
|
||||
case OpSHA1NextEUint32x4:
|
||||
v.Op = OpAMD64SHA1NEXTE128
|
||||
return true
|
||||
case OpSHA1Round4Int32x4:
|
||||
v.Op = OpAMD64SHA1RNDS4128
|
||||
return true
|
||||
case OpSHA1Round4Uint32x4:
|
||||
v.Op = OpAMD64SHA1RNDS4128
|
||||
return true
|
||||
case OpSHA256Msg1Int32x4:
|
||||
v.Op = OpAMD64SHA256MSG1128
|
||||
return true
|
||||
case OpSHA256Msg1Uint32x4:
|
||||
v.Op = OpAMD64SHA256MSG1128
|
||||
return true
|
||||
case OpSHA256Msg2Int32x4:
|
||||
v.Op = OpAMD64SHA256MSG1128
|
||||
return true
|
||||
case OpSHA256Msg2Uint32x4:
|
||||
v.Op = OpAMD64SHA256MSG1128
|
||||
return true
|
||||
case OpSHA256Rounds2Int32x4:
|
||||
v.Op = OpAMD64SHA256RNDS2128
|
||||
return true
|
||||
case OpSHA256Rounds2Uint32x4:
|
||||
v.Op = OpAMD64SHA256RNDS2128
|
||||
return true
|
||||
case OpScaleFloat32x16:
|
||||
v.Op = OpAMD64VSCALEFPS512
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -1987,6 +1987,19 @@ func opLen2Imm8_II(op ssa.Op, t *types.Type, _ int) func(s *state, n *ir.CallExp
|
|||
}
|
||||
}
|
||||
|
||||
// The assembler requires the imm value of a SHA1RNDS4 instruction to be one of 0,1,2,3...
|
||||
func opLen2Imm8_SHA1RNDS4(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
if args[1].Op == ssa.OpConst8 {
|
||||
return s.newValue2I(op, t, (args[1].AuxInt<<int64(offset))&0b11, args[0], args[2])
|
||||
}
|
||||
return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
|
||||
// Encode as int8 due to requirement of AuxInt, check its comment for details.
|
||||
s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx<<offset))&0b11, args[0], args[2])
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func opLen3Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
if args[2].Op == ssa.OpConst8 {
|
||||
|
|
|
|||
|
|
@ -951,6 +951,20 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SHA1Msg1", opLen2(ssa.OpSHA1Msg1Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SHA1Msg1", opLen2(ssa.OpSHA1Msg1Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SHA1Msg2", opLen2(ssa.OpSHA1Msg2Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SHA1Msg2", opLen2(ssa.OpSHA1Msg2Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SHA1NextE", opLen2(ssa.OpSHA1NextEInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SHA1NextE", opLen2(ssa.OpSHA1NextEUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SHA1Round4", opLen2Imm8_SHA1RNDS4(ssa.OpSHA1Round4Int32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SHA1Round4", opLen2Imm8_SHA1RNDS4(ssa.OpSHA1Round4Uint32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SHA256Msg1", opLen2(ssa.OpSHA256Msg1Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SHA256Msg1", opLen2(ssa.OpSHA256Msg1Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SHA256Msg2", opLen2(ssa.OpSHA256Msg2Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SHA256Msg2", opLen2(ssa.OpSHA256Msg2Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SHA256Rounds2", opLen3(ssa.OpSHA256Rounds2Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SHA256Rounds2", opLen3(ssa.OpSHA256Rounds2Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -58,6 +58,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
{{end}}
|
||||
{{define "op2Imm8_II"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_II(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
|
||||
{{end}}
|
||||
{{define "op2Imm8_SHA1RNDS4"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_SHA1RNDS4(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
|
||||
{{end}}
|
||||
{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
|
||||
{{end}}
|
||||
{{define "op3Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ const simdMachineOpsTmpl = `
|
|||
package main
|
||||
|
||||
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
|
||||
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload regInfo) []opData {
|
||||
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload, v31x0AtIn2 regInfo) []opData {
|
||||
return []opData{
|
||||
{{- range .OpsData }}
|
||||
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
|
||||
|
|
@ -61,7 +61,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
|||
"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
|
||||
"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true,
|
||||
"wkwload": true, "v21load": true, "v31load": true, "v11load": true, "w21load": true, "w31load": true, "w2kload": true, "w2kwload": true, "w11load": true,
|
||||
"w3kwload": true, "w2kkload": true}
|
||||
"w3kwload": true, "w2kkload": true, "v31x0AtIn2": true}
|
||||
opsData := make([]opData, 0)
|
||||
opsDataImm := make([]opData, 0)
|
||||
opsDataLoad := make([]opData, 0)
|
||||
|
|
|
|||
|
|
@ -352,6 +352,15 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin
|
|||
func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
|
||||
{{end}}
|
||||
|
||||
{{define "op2Imm8_SHA1RNDS4"}}
|
||||
{{if .Documentation}}{{.Documentation}}
|
||||
//{{end}}
|
||||
// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
|
||||
func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
|
||||
{{end}}
|
||||
|
||||
{{define "op3Imm8"}}
|
||||
{{if .Documentation}}{{.Documentation}}
|
||||
//{{end}}
|
||||
|
|
|
|||
|
|
@ -96,6 +96,9 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
|||
"v2kvloadImm8",
|
||||
"v31ResultInArg0Imm8",
|
||||
"v31loadResultInArg0Imm8",
|
||||
"v21ResultInArg0",
|
||||
"v21ResultInArg0Imm8",
|
||||
"v31x0AtIn2ResultInArg0",
|
||||
}
|
||||
regInfoSet := map[string][]string{}
|
||||
for _, key := range regInfoKeys {
|
||||
|
|
|
|||
|
|
@ -236,9 +236,9 @@ func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskS
|
|||
// regShape returns a string representation of the register shape.
|
||||
func (op *Operation) regShape(mem memShape) (string, error) {
|
||||
_, _, _, _, gOp := op.shape()
|
||||
var regInfo string
|
||||
var regInfo, fixedName string
|
||||
var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt, memInCnt, memOutCnt int
|
||||
for _, in := range gOp.In {
|
||||
for i, in := range gOp.In {
|
||||
switch in.Class {
|
||||
case "vreg":
|
||||
vRegInCnt++
|
||||
|
|
@ -253,8 +253,11 @@ func (op *Operation) regShape(mem memShape) (string, error) {
|
|||
memInCnt++
|
||||
vRegInCnt++
|
||||
}
|
||||
if in.FixedReg != nil {
|
||||
fixedName = fmt.Sprintf("%sAtIn%d", *in.FixedReg, i)
|
||||
}
|
||||
}
|
||||
for _, out := range gOp.Out {
|
||||
for i, out := range gOp.Out {
|
||||
// If class overwrite is happening, that's not really a mask but a vreg.
|
||||
if out.Class == "vreg" || out.OverwriteClass != nil {
|
||||
vRegOutCnt++
|
||||
|
|
@ -269,6 +272,9 @@ func (op *Operation) regShape(mem memShape) (string, error) {
|
|||
vRegOutCnt++
|
||||
memOutCnt++
|
||||
}
|
||||
if out.FixedReg != nil {
|
||||
fixedName = fmt.Sprintf("%sAtIn%d", *out.FixedReg, i)
|
||||
}
|
||||
}
|
||||
var inRegs, inMasks, outRegs, outMasks string
|
||||
|
||||
|
|
@ -309,6 +315,7 @@ func (op *Operation) regShape(mem memShape) (string, error) {
|
|||
if memOutCnt > 0 {
|
||||
panic("simdgen does not understand memory as output as of now")
|
||||
}
|
||||
regInfo += fixedName
|
||||
return regInfo, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -256,6 +256,8 @@ type Operand struct {
|
|||
// because Intel's XED data is inconsistent. e.g. AVX512 VPMADDUBSW marks its operand
|
||||
// elemBits 16, which should be 8.
|
||||
OverwriteElementBits *int
|
||||
// FixedReg is the name of the fixed registers
|
||||
FixedReg *string
|
||||
}
|
||||
|
||||
// isDigit returns true if the byte is an ASCII digit.
|
||||
|
|
|
|||
|
|
@ -92,8 +92,9 @@ import (
|
|||
"slices"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
"simd/_gen/unify"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -199,6 +200,15 @@ func main() {
|
|||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Validate results.
|
||||
//
|
||||
// Don't validate if this is a command-line query because that tends to
|
||||
// eliminate lots of required defs and is used in cases where maybe defs
|
||||
// aren't enumerable anyway.
|
||||
if *flagQ == "" && len(must) > 0 {
|
||||
validate(unified, must)
|
||||
}
|
||||
|
||||
// Print results.
|
||||
switch *flagO {
|
||||
case "yaml":
|
||||
|
|
@ -228,15 +238,6 @@ func main() {
|
|||
fmt.Fprintf(os.Stderr, "XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate results.
|
||||
//
|
||||
// Don't validate if this is a command-line query because that tends to
|
||||
// eliminate lots of required defs and is used in cases where maybe defs
|
||||
// aren't enumerable anyway.
|
||||
if *flagQ == "" && len(must) > 0 {
|
||||
validate(unified, must)
|
||||
}
|
||||
}
|
||||
|
||||
func validate(cl unify.Closure, required map[*unify.Value]struct{}) {
|
||||
|
|
|
|||
|
|
@ -46,4 +46,63 @@
|
|||
documentation: !string |-
|
||||
// NAME performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
|
||||
// x is the chunk of w array in use.
|
||||
// result = InvMixColumns(x)
|
||||
// result = InvMixColumns(x)
|
||||
- go: SHA1Round4
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x contains the state variables a, b, c and d from upper to lower order.
|
||||
// y contains the W array elements (with the state variable e added to the upper element) from upper to lower order.
|
||||
// result = the state variables a', b', c', d' updated after 4 rounds.
|
||||
// constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop.
|
||||
- go: SHA1NextE
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x contains the state variable a (before the 4 rounds), placed in the upper element.
|
||||
// y is the elements of W array for next 4 rounds from upper to lower order.
|
||||
// result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element,
|
||||
// from upper to lower order.
|
||||
// For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0
|
||||
// for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the
|
||||
// computation of the value of e'.)
|
||||
- go: SHA1Msg1
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {W3, W2, W1, W0}
|
||||
// y = {0, 0, W5, W4}
|
||||
// result = {W3^W5, W2^W4, W1^W3, W0^W2}.
|
||||
- go: SHA1Msg2
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = result of 2.
|
||||
// y = {W15, W14, W13}
|
||||
// result = {W19, W18, W17, W16}
|
||||
- go: SHA256Rounds2
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {h, g, d, c}
|
||||
// y = {f, e, b, a}
|
||||
// z = {W0+K0, W1+K1}
|
||||
// result = {f', e', b', a'}
|
||||
// The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to
|
||||
// the corresponding element of the W array to make the input data z.
|
||||
// The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data
|
||||
// y (the state variables a, b, e, f before the 2 rounds).
|
||||
- go: SHA256Msg1
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {W0, W1, W2, W3}
|
||||
// y = {W4, 0, 0, 0}
|
||||
// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}
|
||||
- go: SHA256Msg2
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = result of 2
|
||||
// y = {0, 0, W14, W15}
|
||||
// result = {W16, W17, W18, W19}
|
||||
|
|
@ -52,4 +52,45 @@
|
|||
in:
|
||||
- *uint32s
|
||||
out:
|
||||
- *uint32s
|
||||
- *uint32s
|
||||
- go: SHA1Round4
|
||||
asm: SHA1RNDS4
|
||||
operandOrder: "SHA1RNDS4"
|
||||
in: &2any1imm
|
||||
- *any
|
||||
- *any
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
out: &1any
|
||||
- *any
|
||||
- go: SHA1NextE
|
||||
asm: SHA1NEXTE
|
||||
in: &2any
|
||||
- *any
|
||||
- *any
|
||||
out: *1any
|
||||
- go: SHA1Msg1
|
||||
asm: SHA1MSG1
|
||||
in: *2any
|
||||
out: *1any
|
||||
- go: SHA1Msg2
|
||||
asm: SHA1MSG2
|
||||
in: *2any
|
||||
out: *1any
|
||||
- go: SHA256Rounds2
|
||||
asm: SHA256RNDS2
|
||||
in:
|
||||
- base: $t
|
||||
- base: $t
|
||||
- base: $t
|
||||
overwriteElementBits: 32
|
||||
out:
|
||||
- base: $t
|
||||
- go: SHA256Msg1
|
||||
asm: SHA256MSG1
|
||||
in: *2any
|
||||
out: *1any
|
||||
- go: SHA256Msg2
|
||||
asm: SHA256MSG1
|
||||
in: *2any
|
||||
out: *1any
|
||||
|
|
@ -25,7 +25,6 @@ const (
|
|||
NOT_REG_CLASS = iota // not a register
|
||||
VREG_CLASS // classify as a vector register; see
|
||||
GREG_CLASS // classify as a general register
|
||||
REG_FIXED // classify as a fixed register
|
||||
)
|
||||
|
||||
// instVariant is a bitmap indicating a variant of an instruction that has
|
||||
|
|
@ -852,7 +851,7 @@ type fixedReg struct {
|
|||
}
|
||||
|
||||
var fixedRegMap = map[string]fixedReg{
|
||||
"XED_REG_XMM0": {REG_FIXED, "XMM0", 128},
|
||||
"XED_REG_XMM0": {VREG_CLASS, "x0", 128},
|
||||
}
|
||||
|
||||
// decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS, VREG_CLASS_FIXED,
|
||||
|
|
|
|||
|
|
@ -106,3 +106,11 @@ func HasAVX512VPOPCNTDQ() bool {
|
|||
func HasAVXVNNI() bool {
|
||||
return cpu.X86.HasAVXVNNI
|
||||
}
|
||||
|
||||
// HasSHA returns whether the CPU supports the SHA feature.
|
||||
//
|
||||
// HasSHA is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func HasSHA() bool {
|
||||
return cpu.X86.HasSHA
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5623,6 +5623,156 @@ func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4
|
|||
// Asm: VREDUCEPD, CPU Feature: AVX512
|
||||
func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8
|
||||
|
||||
/* SHA1Msg1 */
|
||||
|
||||
// SHA1Msg1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {W3, W2, W1, W0}
|
||||
// y = {0, 0, W5, W4}
|
||||
// result = {W3^W5, W2^W4, W1^W3, W0^W2}.
|
||||
//
|
||||
// Asm: SHA1MSG1, CPU Feature: SHA
|
||||
func (x Int32x4) SHA1Msg1(y Int32x4) Int32x4
|
||||
|
||||
// SHA1Msg1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {W3, W2, W1, W0}
|
||||
// y = {0, 0, W5, W4}
|
||||
// result = {W3^W5, W2^W4, W1^W3, W0^W2}.
|
||||
//
|
||||
// Asm: SHA1MSG1, CPU Feature: SHA
|
||||
func (x Uint32x4) SHA1Msg1(y Uint32x4) Uint32x4
|
||||
|
||||
/* SHA1Msg2 */
|
||||
|
||||
// SHA1Msg2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = result of 2.
|
||||
// y = {W15, W14, W13}
|
||||
// result = {W19, W18, W17, W16}
|
||||
//
|
||||
// Asm: SHA1MSG2, CPU Feature: SHA
|
||||
func (x Int32x4) SHA1Msg2(y Int32x4) Int32x4
|
||||
|
||||
// SHA1Msg2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = result of 2.
|
||||
// y = {W15, W14, W13}
|
||||
// result = {W19, W18, W17, W16}
|
||||
//
|
||||
// Asm: SHA1MSG2, CPU Feature: SHA
|
||||
func (x Uint32x4) SHA1Msg2(y Uint32x4) Uint32x4
|
||||
|
||||
/* SHA1NextE */
|
||||
|
||||
// SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x contains the state variable a (before the 4 rounds), placed in the upper element.
|
||||
// y is the elements of W array for next 4 rounds from upper to lower order.
|
||||
// result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element,
|
||||
// from upper to lower order.
|
||||
// For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0
|
||||
// for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the
|
||||
// computation of the value of e'.)
|
||||
//
|
||||
// Asm: SHA1NEXTE, CPU Feature: SHA
|
||||
func (x Int32x4) SHA1NextE(y Int32x4) Int32x4
|
||||
|
||||
// SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x contains the state variable a (before the 4 rounds), placed in the upper element.
|
||||
// y is the elements of W array for next 4 rounds from upper to lower order.
|
||||
// result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element,
|
||||
// from upper to lower order.
|
||||
// For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0
|
||||
// for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the
|
||||
// computation of the value of e'.)
|
||||
//
|
||||
// Asm: SHA1NEXTE, CPU Feature: SHA
|
||||
func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4
|
||||
|
||||
/* SHA1Round4 */
|
||||
|
||||
// SHA1Round4 performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x contains the state variables a, b, c and d from upper to lower order.
|
||||
// y contains the W array elements (with the state variable e added to the upper element) from upper to lower order.
|
||||
// result = the state variables a', b', c', d' updated after 4 rounds.
|
||||
// constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop.
|
||||
//
|
||||
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: SHA1RNDS4, CPU Feature: SHA
|
||||
func (x Int32x4) SHA1Round4(constant uint8, y Int32x4) Int32x4
|
||||
|
||||
// SHA1Round4 performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x contains the state variables a, b, c and d from upper to lower order.
|
||||
// y contains the W array elements (with the state variable e added to the upper element) from upper to lower order.
|
||||
// result = the state variables a', b', c', d' updated after 4 rounds.
|
||||
// constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop.
|
||||
//
|
||||
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: SHA1RNDS4, CPU Feature: SHA
|
||||
func (x Uint32x4) SHA1Round4(constant uint8, y Uint32x4) Uint32x4
|
||||
|
||||
/* SHA256Msg1 */
|
||||
|
||||
// SHA256Msg1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {W0, W1, W2, W3}
|
||||
// y = {W4, 0, 0, 0}
|
||||
// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}
|
||||
//
|
||||
// Asm: SHA256MSG1, CPU Feature: SHA
|
||||
func (x Int32x4) SHA256Msg1(y Int32x4) Int32x4
|
||||
|
||||
// SHA256Msg1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {W0, W1, W2, W3}
|
||||
// y = {W4, 0, 0, 0}
|
||||
// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}
|
||||
//
|
||||
// Asm: SHA256MSG1, CPU Feature: SHA
|
||||
func (x Uint32x4) SHA256Msg1(y Uint32x4) Uint32x4
|
||||
|
||||
/* SHA256Msg2 */
|
||||
|
||||
// SHA256Msg2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = result of 2
|
||||
// y = {0, 0, W14, W15}
|
||||
// result = {W16, W17, W18, W19}
|
||||
//
|
||||
// Asm: SHA256MSG1, CPU Feature: SHA
|
||||
func (x Int32x4) SHA256Msg2(y Int32x4) Int32x4
|
||||
|
||||
// SHA256Msg2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = result of 2
|
||||
// y = {0, 0, W14, W15}
|
||||
// result = {W16, W17, W18, W19}
|
||||
//
|
||||
// Asm: SHA256MSG1, CPU Feature: SHA
|
||||
func (x Uint32x4) SHA256Msg2(y Uint32x4) Uint32x4
|
||||
|
||||
/* SHA256Rounds2 */
|
||||
|
||||
// SHA256Rounds2 does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {h, g, d, c}
|
||||
// y = {f, e, b, a}
|
||||
// z = {W0+K0, W1+K1}
|
||||
// result = {f', e', b', a'}
|
||||
// The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to
|
||||
// the corresponding element of the W array to make the input data z.
|
||||
// The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data
|
||||
// y (the state variables a, b, e, f before the 2 rounds).
|
||||
//
|
||||
// Asm: SHA256RNDS2, CPU Feature: SHA
|
||||
func (x Int32x4) SHA256Rounds2(y Int32x4, z Int32x4) Int32x4
|
||||
|
||||
// SHA256Rounds2 does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4.
|
||||
// x = {h, g, d, c}
|
||||
// y = {f, e, b, a}
|
||||
// z = {W0+K0, W1+K1}
|
||||
// result = {f', e', b', a'}
|
||||
// The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to
|
||||
// the corresponding element of the W array to make the input data z.
|
||||
// The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data
|
||||
// y (the state variables a, b, e, f before the 2 rounds).
|
||||
//
|
||||
// Asm: SHA256RNDS2, CPU Feature: SHA
|
||||
func (x Uint32x4) SHA256Rounds2(y Uint32x4, z Uint32x4) Uint32x4
|
||||
|
||||
/* Scale */
|
||||
|
||||
// Scale multiplies elements by a power of 2.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue