[dev.simd] simd: add emulations for missing AVX2 comparisons

this also removes AVX512 versions of the operations
that would use the same names, but not run on AVX2-only

includes files generated by simdgen CL 692355

Change-Id: Iff29042245b7688133fed49a03e681e85235b8a8
Reviewed-on: https://go-review.googlesource.com/c/go/+/692335
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
David Chase 2025-08-01 09:23:45 -04:00
parent ddb689c7bb
commit 2080415aa2
11 changed files with 855 additions and 2827 deletions

View file

@ -87,6 +87,16 @@ var ternaryFlaky = &shapes{ // for tests that support flaky equality
floats: []int{32},
}
var avx2SignedComparisons = &shapes{
vecs: []int{128, 256},
ints: []int{8, 16, 32, 64},
}
var avx2UnsignedComparisons = &shapes{
vecs: []int{128, 256},
uints: []int{8, 16, 32, 64},
}
type templateData struct {
Vec string // the type of the vector, e.g. Float32x4
AOrAn string // for documentation, the article "a" or "an"
@ -486,6 +496,130 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
}
`)
func (t templateData) CPUfeature() string {
switch t.Vwidth {
case 128:
return "AVX"
case 256:
return "AVX2"
case 512:
return "AVX512"
}
panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
}
var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", `
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}()
return y.Greater(x).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}()
return x.Greater(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}()
return x.Equal(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
`)
// CPUfeatureAVX2if8 return AVX2 if the element width is 8,
// otherwise, it returns CPUfeature. This is for the cpufeature
// of unsigned comparison emulation, which uses shifts for all
// the sizes > 8 (shifts are AVX) but must use broadcast (AVX2)
// for bytes.
func (t templateData) CPUfeatureAVX2if8() string {
if t.Width == 8 {
return "AVX2"
}
return t.CPUfeature()
}
var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", `
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
{{- if eq .Width 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
{{- else}}
ones := x.Equal(x).AsInt{{.WxC}}()
signs := ones.ShiftAllLeft({{.Width}}-1)
{{- end }}
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
{{- if eq .Width 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
{{- else}}
ones := x.Equal(x).AsInt{{.WxC}}()
signs := ones.ShiftAllLeft({{.Width}}-1)
{{- end }}
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}()
{{- if eq .Width 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
{{- else}}
signs := ones.ShiftAllLeft({{.Width}}-1)
{{- end }}
return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}()
{{- if eq .Width 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
{{- else}}
signs := ones.ShiftAllLeft({{.Width}}-1)
{{- end }}
return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}()
return a.Equal(b).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
`)
var unsafePATemplate = templateOf("unsafe PA helper", `
// pa{{.Vec}} returns a type-unsafe pointer to array that can
// only be used with partial load/store operations that only
@ -591,6 +725,8 @@ func main() {
avx2SmallLoadSlicePartTemplate,
avx2MaskedTemplate,
avx512MaskedTemplate,
avx2SignedComparisonsTemplate,
avx2UnsignedComparisonsTemplate,
broadcastTemplate,
)
}