[dev.simd] cmd/compile: don't require single use for SIMD load/store folding

For load and store on scalar values, we fold the address to the
load/stoer instruction without requiring the address having one
use. Do the same for SIMD, and remove the single use check.

Change-Id: Ie7d1bbae1b32bb8c069548197632edae36b419b9
Reviewed-on: https://go-review.googlesource.com/c/go/+/707137
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Cherry Mui 2025-09-26 14:38:22 -04:00
parent b4d1e018a8
commit f0e281e693
2 changed files with 28 additions and 28 deletions

View file

@ -1798,10 +1798,10 @@
(VMOVSDf2v x:(MOVSDconst [c] )) => (VMOVSDconst [c] ) (VMOVSDf2v x:(MOVSDconst [c] )) => (VMOVSDconst [c] )
(VMOVSSf2v x:(MOVSSconst [c] )) => (VMOVSSconst [c] ) (VMOVSSf2v x:(MOVSSconst [c] )) => (VMOVSSconst [c] )
(VMOVDQUload(128|256|512) [off1] {sym} x:(ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 => (VMOVDQUload(128|256|512) [off1+off2] {sym} ptr mem) (VMOVDQUload(128|256|512) [off1] {sym} x:(ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (VMOVDQUload(128|256|512) [off1+off2] {sym} ptr mem)
(VMOVDQUstore(128|256|512) [off1] {sym} x:(ADDQconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 => (VMOVDQUstore(128|256|512) [off1+off2] {sym} ptr val mem) (VMOVDQUstore(128|256|512) [off1] {sym} x:(ADDQconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) => (VMOVDQUstore(128|256|512) [off1+off2] {sym} ptr val mem)
(VMOVDQUload(128|256|512) [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) => (VMOVDQUload(128|256|512) [off1+off2] {mergeSym(sym1, sym2)} base mem) (VMOVDQUload(128|256|512) [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (VMOVDQUload(128|256|512) [off1+off2] {mergeSym(sym1, sym2)} base mem)
(VMOVDQUstore(128|256|512) [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) => (VMOVDQUstore(128|256|512) [off1+off2] {mergeSym(sym1, sym2)} base val mem) (VMOVDQUstore(128|256|512) [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (VMOVDQUstore(128|256|512) [off1+off2] {mergeSym(sym1, sym2)} base val mem)
// 2-op VPTEST optimizations // 2-op VPTEST optimizations
(SETEQ (VPTEST x:(VPAND(128|256) j k) y)) && x == y && x.Uses == 2 => (SETEQ (VPTEST j k)) (SETEQ (VPTEST x:(VPAND(128|256) j k) y)) && x == y && x.Uses == 2 => (SETEQ (VPTEST j k))

View file

@ -33295,7 +33295,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload128(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (VMOVDQUload128 [off1] {sym} x:(ADDQconst [off2] ptr) mem) // match: (VMOVDQUload128 [off1] {sym} x:(ADDQconst [off2] ptr) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 // cond: is32Bit(int64(off1)+int64(off2))
// result: (VMOVDQUload128 [off1+off2] {sym} ptr mem) // result: (VMOVDQUload128 [off1+off2] {sym} ptr mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33307,7 +33307,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload128(v *Value) bool {
off2 := auxIntToInt32(x.AuxInt) off2 := auxIntToInt32(x.AuxInt)
ptr := x.Args[0] ptr := x.Args[0]
mem := v_1 mem := v_1
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) { if !(is32Bit(int64(off1) + int64(off2))) {
break break
} }
v.reset(OpAMD64VMOVDQUload128) v.reset(OpAMD64VMOVDQUload128)
@ -33317,7 +33317,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload128(v *Value) bool {
return true return true
} }
// match: (VMOVDQUload128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) // match: (VMOVDQUload128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
// result: (VMOVDQUload128 [off1+off2] {mergeSym(sym1, sym2)} base mem) // result: (VMOVDQUload128 [off1+off2] {mergeSym(sym1, sym2)} base mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33330,7 +33330,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload128(v *Value) bool {
sym2 := auxToSym(x.Aux) sym2 := auxToSym(x.Aux)
base := x.Args[0] base := x.Args[0]
mem := v_1 mem := v_1
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
break break
} }
v.reset(OpAMD64VMOVDQUload128) v.reset(OpAMD64VMOVDQUload128)
@ -33345,7 +33345,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload256(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (VMOVDQUload256 [off1] {sym} x:(ADDQconst [off2] ptr) mem) // match: (VMOVDQUload256 [off1] {sym} x:(ADDQconst [off2] ptr) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 // cond: is32Bit(int64(off1)+int64(off2))
// result: (VMOVDQUload256 [off1+off2] {sym} ptr mem) // result: (VMOVDQUload256 [off1+off2] {sym} ptr mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33357,7 +33357,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload256(v *Value) bool {
off2 := auxIntToInt32(x.AuxInt) off2 := auxIntToInt32(x.AuxInt)
ptr := x.Args[0] ptr := x.Args[0]
mem := v_1 mem := v_1
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) { if !(is32Bit(int64(off1) + int64(off2))) {
break break
} }
v.reset(OpAMD64VMOVDQUload256) v.reset(OpAMD64VMOVDQUload256)
@ -33367,7 +33367,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload256(v *Value) bool {
return true return true
} }
// match: (VMOVDQUload256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) // match: (VMOVDQUload256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
// result: (VMOVDQUload256 [off1+off2] {mergeSym(sym1, sym2)} base mem) // result: (VMOVDQUload256 [off1+off2] {mergeSym(sym1, sym2)} base mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33380,7 +33380,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload256(v *Value) bool {
sym2 := auxToSym(x.Aux) sym2 := auxToSym(x.Aux)
base := x.Args[0] base := x.Args[0]
mem := v_1 mem := v_1
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
break break
} }
v.reset(OpAMD64VMOVDQUload256) v.reset(OpAMD64VMOVDQUload256)
@ -33395,7 +33395,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload512(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (VMOVDQUload512 [off1] {sym} x:(ADDQconst [off2] ptr) mem) // match: (VMOVDQUload512 [off1] {sym} x:(ADDQconst [off2] ptr) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 // cond: is32Bit(int64(off1)+int64(off2))
// result: (VMOVDQUload512 [off1+off2] {sym} ptr mem) // result: (VMOVDQUload512 [off1+off2] {sym} ptr mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33407,7 +33407,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload512(v *Value) bool {
off2 := auxIntToInt32(x.AuxInt) off2 := auxIntToInt32(x.AuxInt)
ptr := x.Args[0] ptr := x.Args[0]
mem := v_1 mem := v_1
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) { if !(is32Bit(int64(off1) + int64(off2))) {
break break
} }
v.reset(OpAMD64VMOVDQUload512) v.reset(OpAMD64VMOVDQUload512)
@ -33417,7 +33417,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload512(v *Value) bool {
return true return true
} }
// match: (VMOVDQUload512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) // match: (VMOVDQUload512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
// result: (VMOVDQUload512 [off1+off2] {mergeSym(sym1, sym2)} base mem) // result: (VMOVDQUload512 [off1+off2] {mergeSym(sym1, sym2)} base mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33430,7 +33430,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUload512(v *Value) bool {
sym2 := auxToSym(x.Aux) sym2 := auxToSym(x.Aux)
base := x.Args[0] base := x.Args[0]
mem := v_1 mem := v_1
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
break break
} }
v.reset(OpAMD64VMOVDQUload512) v.reset(OpAMD64VMOVDQUload512)
@ -33446,7 +33446,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore128(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (VMOVDQUstore128 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) // match: (VMOVDQUstore128 [off1] {sym} x:(ADDQconst [off2] ptr) val mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 // cond: is32Bit(int64(off1)+int64(off2))
// result: (VMOVDQUstore128 [off1+off2] {sym} ptr val mem) // result: (VMOVDQUstore128 [off1+off2] {sym} ptr val mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33459,7 +33459,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore128(v *Value) bool {
ptr := x.Args[0] ptr := x.Args[0]
val := v_1 val := v_1
mem := v_2 mem := v_2
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) { if !(is32Bit(int64(off1) + int64(off2))) {
break break
} }
v.reset(OpAMD64VMOVDQUstore128) v.reset(OpAMD64VMOVDQUstore128)
@ -33469,7 +33469,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore128(v *Value) bool {
return true return true
} }
// match: (VMOVDQUstore128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) // match: (VMOVDQUstore128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
// result: (VMOVDQUstore128 [off1+off2] {mergeSym(sym1, sym2)} base val mem) // result: (VMOVDQUstore128 [off1+off2] {mergeSym(sym1, sym2)} base val mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33483,7 +33483,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore128(v *Value) bool {
base := x.Args[0] base := x.Args[0]
val := v_1 val := v_1
mem := v_2 mem := v_2
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
break break
} }
v.reset(OpAMD64VMOVDQUstore128) v.reset(OpAMD64VMOVDQUstore128)
@ -33499,7 +33499,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore256(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (VMOVDQUstore256 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) // match: (VMOVDQUstore256 [off1] {sym} x:(ADDQconst [off2] ptr) val mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 // cond: is32Bit(int64(off1)+int64(off2))
// result: (VMOVDQUstore256 [off1+off2] {sym} ptr val mem) // result: (VMOVDQUstore256 [off1+off2] {sym} ptr val mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33512,7 +33512,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore256(v *Value) bool {
ptr := x.Args[0] ptr := x.Args[0]
val := v_1 val := v_1
mem := v_2 mem := v_2
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) { if !(is32Bit(int64(off1) + int64(off2))) {
break break
} }
v.reset(OpAMD64VMOVDQUstore256) v.reset(OpAMD64VMOVDQUstore256)
@ -33522,7 +33522,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore256(v *Value) bool {
return true return true
} }
// match: (VMOVDQUstore256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) // match: (VMOVDQUstore256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
// result: (VMOVDQUstore256 [off1+off2] {mergeSym(sym1, sym2)} base val mem) // result: (VMOVDQUstore256 [off1+off2] {mergeSym(sym1, sym2)} base val mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33536,7 +33536,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore256(v *Value) bool {
base := x.Args[0] base := x.Args[0]
val := v_1 val := v_1
mem := v_2 mem := v_2
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
break break
} }
v.reset(OpAMD64VMOVDQUstore256) v.reset(OpAMD64VMOVDQUstore256)
@ -33552,7 +33552,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore512(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (VMOVDQUstore512 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) // match: (VMOVDQUstore512 [off1] {sym} x:(ADDQconst [off2] ptr) val mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 // cond: is32Bit(int64(off1)+int64(off2))
// result: (VMOVDQUstore512 [off1+off2] {sym} ptr val mem) // result: (VMOVDQUstore512 [off1+off2] {sym} ptr val mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33565,7 +33565,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore512(v *Value) bool {
ptr := x.Args[0] ptr := x.Args[0]
val := v_1 val := v_1
mem := v_2 mem := v_2
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) { if !(is32Bit(int64(off1) + int64(off2))) {
break break
} }
v.reset(OpAMD64VMOVDQUstore512) v.reset(OpAMD64VMOVDQUstore512)
@ -33575,7 +33575,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore512(v *Value) bool {
return true return true
} }
// match: (VMOVDQUstore512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) // match: (VMOVDQUstore512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem)
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
// result: (VMOVDQUstore512 [off1+off2] {mergeSym(sym1, sym2)} base val mem) // result: (VMOVDQUstore512 [off1+off2] {mergeSym(sym1, sym2)} base val mem)
for { for {
off1 := auxIntToInt32(v.AuxInt) off1 := auxIntToInt32(v.AuxInt)
@ -33589,7 +33589,7 @@ func rewriteValueAMD64_OpAMD64VMOVDQUstore512(v *Value) bool {
base := x.Args[0] base := x.Args[0]
val := v_1 val := v_1
mem := v_2 mem := v_2
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
break break
} }
v.reset(OpAMD64VMOVDQUstore512) v.reset(OpAMD64VMOVDQUstore512)