diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 1ab4c88cba7..c535734bd52 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -44,9 +44,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VCVTTPS2DQ128, ssa.OpAMD64VCVTTPS2DQ256, ssa.OpAMD64VCVTTPS2DQ512, + ssa.OpAMD64VPMOVZXBW256, + ssa.OpAMD64VPMOVZXBW512, + ssa.OpAMD64VPMOVZXBW128, ssa.OpAMD64VCVTPS2UDQ128, ssa.OpAMD64VCVTPS2UDQ256, ssa.OpAMD64VCVTPS2UDQ512, + ssa.OpAMD64VPMOVZXWD256, + ssa.OpAMD64VPMOVZXWD512, + ssa.OpAMD64VPMOVZXWD128, ssa.OpAMD64VPOPCNTB128, ssa.OpAMD64VPOPCNTB256, ssa.OpAMD64VPOPCNTB512, @@ -679,9 +685,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, ssa.OpAMD64VCVTTPS2DQMasked512, + ssa.OpAMD64VPMOVZXBWMasked256, + ssa.OpAMD64VPMOVZXBWMasked512, + ssa.OpAMD64VPMOVZXBWMasked128, ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked256, ssa.OpAMD64VCVTPS2UDQMasked512, + ssa.OpAMD64VPMOVZXWDMasked256, + ssa.OpAMD64VPMOVZXWDMasked512, + ssa.OpAMD64VPMOVZXWDMasked128, ssa.OpAMD64VEXPANDPSMasked128, ssa.OpAMD64VEXPANDPSMasked256, ssa.OpAMD64VEXPANDPSMasked512, @@ -1289,9 +1301,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, ssa.OpAMD64VCVTTPS2DQMasked512, + ssa.OpAMD64VPMOVZXBWMasked256, + ssa.OpAMD64VPMOVZXBWMasked512, + ssa.OpAMD64VPMOVZXBWMasked128, ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked256, ssa.OpAMD64VCVTPS2UDQMasked512, + ssa.OpAMD64VPMOVZXWDMasked256, + ssa.OpAMD64VPMOVZXWDMasked512, + ssa.OpAMD64VPMOVZXWDMasked128, ssa.OpAMD64VDIVPSMasked128, ssa.OpAMD64VDIVPSMasked256, ssa.OpAMD64VDIVPSMasked512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index cfe0075986f..f2bb1ffb009 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -214,9 +214,15 @@ (ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...) (ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...) (ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...) +(ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...) +(ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...) +(ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...) (ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...) (ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...) (ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...) +(ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...) +(ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...) +(ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) (CopySignInt8x16 ...) => (VPSIGNB128 ...) (CopySignInt8x32 ...) => (VPSIGNB256 ...) (CopySignInt16x8 ...) => (VPSIGNW128 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index ba73453ffe1..c87978cd0d7 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -542,6 +542,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXBW128", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXBW256", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXBW512", argLength: 1, reg: w11, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXBWMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXBWMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXBWMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXWD128", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXWD256", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXWD512", argLength: 1, reg: w11, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXWDMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXWDMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXWDMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 08bfe369511..4d48e4b16ec 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -206,9 +206,15 @@ func simdGenericOps() []opData { {name: "ConvertToInt32Float32x4", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x8", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x16", argLength: 1, commutative: false}, + {name: "ConvertToUint16Uint8x16", argLength: 1, commutative: false}, + {name: "ConvertToUint16Uint8x32", argLength: 1, commutative: false}, + {name: "ConvertToUint16x8Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x4", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x8", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x16", argLength: 1, commutative: false}, + {name: "ConvertToUint32Uint16x8", argLength: 1, commutative: false}, + {name: "ConvertToUint32Uint16x16", argLength: 1, commutative: false}, + {name: "ConvertToUint32x4Uint16x8", argLength: 1, commutative: false}, {name: "CopySignInt8x16", argLength: 2, commutative: false}, {name: "CopySignInt8x32", argLength: 2, commutative: false}, {name: "CopySignInt16x8", argLength: 2, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 9f6e10c95cb..5379dfdb191 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1765,6 +1765,18 @@ const ( OpAMD64VPMINUWMasked128 OpAMD64VPMINUWMasked256 OpAMD64VPMINUWMasked512 + OpAMD64VPMOVZXBW128 + OpAMD64VPMOVZXBW256 + OpAMD64VPMOVZXBW512 + OpAMD64VPMOVZXBWMasked128 + OpAMD64VPMOVZXBWMasked256 + OpAMD64VPMOVZXBWMasked512 + OpAMD64VPMOVZXWD128 + OpAMD64VPMOVZXWD256 + OpAMD64VPMOVZXWD512 + OpAMD64VPMOVZXWDMasked128 + OpAMD64VPMOVZXWDMasked256 + OpAMD64VPMOVZXWDMasked512 OpAMD64VPMULDQ128 OpAMD64VPMULDQ256 OpAMD64VPMULHUW128 @@ -4838,9 +4850,15 @@ const ( OpConvertToInt32Float32x4 OpConvertToInt32Float32x8 OpConvertToInt32Float32x16 + OpConvertToUint16Uint8x16 + OpConvertToUint16Uint8x32 + OpConvertToUint16x8Uint8x16 OpConvertToUint32Float32x4 OpConvertToUint32Float32x8 OpConvertToUint32Float32x16 + OpConvertToUint32Uint16x8 + OpConvertToUint32Uint16x16 + OpConvertToUint32x4Uint16x8 OpCopySignInt8x16 OpCopySignInt8x32 OpCopySignInt16x8 @@ -26824,6 +26842,168 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMOVZXBW128", + argLen: 1, + asm: x86.AVPMOVZXBW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBW256", + argLen: 1, + asm: x86.AVPMOVZXBW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBW512", + argLen: 1, + asm: x86.AVPMOVZXBW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVZXBWMasked128", + argLen: 2, + asm: x86.AVPMOVZXBW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBWMasked256", + argLen: 2, + asm: x86.AVPMOVZXBW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBWMasked512", + argLen: 2, + asm: x86.AVPMOVZXBW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWD128", + argLen: 1, + asm: x86.AVPMOVZXWD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWD256", + argLen: 1, + asm: x86.AVPMOVZXWD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWD512", + argLen: 1, + asm: x86.AVPMOVZXWD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVZXWDMasked128", + argLen: 2, + asm: x86.AVPMOVZXWD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWDMasked256", + argLen: 2, + asm: x86.AVPMOVZXWD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWDMasked512", + argLen: 2, + asm: x86.AVPMOVZXWD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMULDQ128", argLen: 2, @@ -64008,6 +64188,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToUint16Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint16Uint8x32", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint16x8Uint8x16", + argLen: 1, + generic: true, + }, { name: "ConvertToUint32Float32x4", argLen: 1, @@ -64023,6 +64218,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToUint32Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint32Uint16x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint32x4Uint16x8", + argLen: 1, + generic: true, + }, { name: "CopySignInt8x16", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 87b1e0586d7..2b2df15bc12 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1333,6 +1333,15 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToInt32Float32x8: v.Op = OpAMD64VCVTTPS2DQ256 return true + case OpConvertToUint16Uint8x16: + v.Op = OpAMD64VPMOVZXBW256 + return true + case OpConvertToUint16Uint8x32: + v.Op = OpAMD64VPMOVZXBW512 + return true + case OpConvertToUint16x8Uint8x16: + v.Op = OpAMD64VPMOVZXBW128 + return true case OpConvertToUint32Float32x16: v.Op = OpAMD64VCVTPS2UDQ512 return true @@ -1342,6 +1351,15 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToUint32Float32x8: v.Op = OpAMD64VCVTPS2UDQ256 return true + case OpConvertToUint32Uint16x16: + v.Op = OpAMD64VPMOVZXWD512 + return true + case OpConvertToUint32Uint16x8: + v.Op = OpAMD64VPMOVZXWD256 + return true + case OpConvertToUint32x4Uint16x8: + v.Op = OpAMD64VPMOVZXWD128 + return true case OpCopySignInt16x16: v.Op = OpAMD64VPSIGNW256 return true diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index e6c6874bddc..a519b7d5b3e 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -226,9 +226,15 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x32.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64) diff --git a/src/simd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/_gen/simdgen/ops/Converts/categories.yaml index cc6c419dcc4..b4c7d468e99 100644 --- a/src/simd/_gen/simdgen/ops/Converts/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Converts/categories.yaml @@ -2,9 +2,24 @@ - go: ConvertToInt32 commutative: false documentation: !string |- - // ConvertToInt32 converts element values to int32. + // NAME converts element values to int32. - go: ConvertToUint32 commutative: false documentation: !string |- - // ConvertToUint32Masked converts element values to uint32. + // NAME converts element values to uint32. + +- go: ConvertToUint16 + commutative: false + documentation: !string |- + // NAME converts element values to uint16. + +- go: ConvertToUint16x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to uint16. + +- go: ConvertToUint32x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to uint32. diff --git a/src/simd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/_gen/simdgen/ops/Converts/go.yaml index 4e251728bf9..be0f157b40c 100644 --- a/src/simd/_gen/simdgen/ops/Converts/go.yaml +++ b/src/simd/_gen/simdgen/ops/Converts/go.yaml @@ -19,3 +19,75 @@ go: $u base: uint elemBits: 32 + +- go: ConvertToUint16x8 + asm: "VPMOVZXBW" + in: + - &u8x16 + base: uint + elemBits: 8 + bits: 128 + out: + - + base: uint + elemBits: 16 + bits: 128 + +- go: ConvertToUint16 + asm: "VPMOVZXBW" + in: + - *u8x16 + out: + - + base: uint + elemBits: 16 + bits: 256 + +- go: ConvertToUint16 + asm: "VPMOVZXBW" + in: + - + base: uint + elemBits: 8 + bits: 256 + out: + - + base: uint + elemBits: 16 + bits: 512 + +- go: ConvertToUint32x4 + asm: "VPMOVZXWD" + in: + - &u16x8 + base: uint + elemBits: 16 + bits: 128 + out: + - + base: uint + elemBits: 32 + bits: 128 + +- go: ConvertToUint32 + asm: "VPMOVZXWD" + in: + - *u16x8 + out: + - + base: uint + elemBits: 32 + bits: 256 + +- go: ConvertToUint32 + asm: "VPMOVZXWD" + in: + - + base: uint + elemBits: 16 + bits: 256 + out: + - + base: uint + elemBits: 32 + bits: 512 diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 76bbf738cb1..79f5dc8523b 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -1212,23 +1212,59 @@ func (x Float32x8) ConvertToInt32() Int32x8 // Asm: VCVTTPS2DQ, CPU Feature: AVX512 func (x Float32x16) ConvertToInt32() Int32x16 +/* ConvertToUint16 */ + +// ConvertToUint16 converts element values to uint16. +// +// Asm: VPMOVZXBW, CPU Feature: AVX2 +func (x Uint8x16) ConvertToUint16() Uint16x16 + +// ConvertToUint16 converts element values to uint16. +// +// Asm: VPMOVZXBW, CPU Feature: AVX512 +func (x Uint8x32) ConvertToUint16() Uint16x32 + +/* ConvertToUint16x8 */ + +// ConvertToUint16x8 converts 8 lowest vector element values to uint16. +// +// Asm: VPMOVZXBW, CPU Feature: AVX +func (x Uint8x16) ConvertToUint16x8() Uint16x8 + /* ConvertToUint32 */ -// ConvertToUint32Masked converts element values to uint32. +// ConvertToUint32 converts element values to uint32. // // Asm: VCVTPS2UDQ, CPU Feature: AVX512 func (x Float32x4) ConvertToUint32() Uint32x4 -// ConvertToUint32Masked converts element values to uint32. +// ConvertToUint32 converts element values to uint32. // // Asm: VCVTPS2UDQ, CPU Feature: AVX512 func (x Float32x8) ConvertToUint32() Uint32x8 -// ConvertToUint32Masked converts element values to uint32. +// ConvertToUint32 converts element values to uint32. // // Asm: VCVTPS2UDQ, CPU Feature: AVX512 func (x Float32x16) ConvertToUint32() Uint32x16 +// ConvertToUint32 converts element values to uint32. +// +// Asm: VPMOVZXWD, CPU Feature: AVX2 +func (x Uint16x8) ConvertToUint32() Uint32x8 + +// ConvertToUint32 converts element values to uint32. +// +// Asm: VPMOVZXWD, CPU Feature: AVX512 +func (x Uint16x16) ConvertToUint32() Uint32x16 + +/* ConvertToUint32x4 */ + +// ConvertToUint32x4 converts 4 lowest vector element values to uint32. +// +// Asm: VPMOVZXWD, CPU Feature: AVX +func (x Uint16x8) ConvertToUint32x4() Uint32x4 + /* CopySign */ // CopySign returns the product of the first operand with -1, 0, or 1,