diff --git a/src/cmd/compile/internal/gc/asm_test.go b/src/cmd/compile/internal/gc/asm_test.go index c45615ae3a0..c2fc9862f3b 100644 --- a/src/cmd/compile/internal/gc/asm_test.go +++ b/src/cmd/compile/internal/gc/asm_test.go @@ -3245,6 +3245,24 @@ var linuxARM64Tests = []*asmTest{ pos: []string{"STP"}, neg: []string{"MOVB", "MOVH"}, }, + { + fn: ` + func $(a *[39]byte) { + *a = [39]byte{} + } + `, + pos: []string{"MOVD"}, + neg: []string{"MOVB", "MOVH", "MOVW"}, + }, + { + fn: ` + func $(a *[30]byte) { + *a = [30]byte{} + } + `, + pos: []string{"STP"}, + neg: []string{"MOVB", "MOVH", "MOVW"}, + }, } var linuxMIPSTests = []*asmTest{ diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 9f6ef57d434..c6057f24613 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -399,10 +399,14 @@ (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))) // strip off fractional word zeroing -(Zero [s] ptr mem) && s%16 != 0 && s > 16 -> - (Zero [s-s%16] - (OffPtr ptr [s%16]) - (Zero [s%16] ptr mem)) +(Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 -> + (Zero [8] + (OffPtr ptr [s-8]) + (Zero [s-s%16] ptr mem)) +(Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 -> + (Zero [16] + (OffPtr ptr [s-16]) + (Zero [s-s%16] ptr mem)) // medium zeroing uses a duff device // 4, 16, and 64 are magic constants, see runtime/mkduff.go diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 1bb21d8a2c3..53331eda312 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -18551,24 +18551,48 @@ func rewriteValueARM64_OpZero_20(v *Value) bool { config := b.Func.Config _ = config // match: (Zero [s] ptr mem) - // cond: s%16 != 0 && s > 16 - // result: (Zero [s-s%16] (OffPtr ptr [s%16]) (Zero [s%16] ptr mem)) + // cond: s%16 != 0 && s%16 <= 8 && s > 16 + // result: (Zero [8] (OffPtr ptr [s-8]) (Zero [s-s%16] ptr mem)) for { s := v.AuxInt _ = v.Args[1] ptr := v.Args[0] mem := v.Args[1] - if !(s%16 != 0 && s > 16) { + if !(s%16 != 0 && s%16 <= 8 && s > 16) { break } v.reset(OpZero) - v.AuxInt = s - s%16 + v.AuxInt = 8 v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type) - v0.AuxInt = s % 16 + v0.AuxInt = s - 8 v0.AddArg(ptr) v.AddArg(v0) v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem) - v1.AuxInt = s % 16 + v1.AuxInt = s - s%16 + v1.AddArg(ptr) + v1.AddArg(mem) + v.AddArg(v1) + return true + } + // match: (Zero [s] ptr mem) + // cond: s%16 != 0 && s%16 > 8 && s > 16 + // result: (Zero [16] (OffPtr ptr [s-16]) (Zero [s-s%16] ptr mem)) + for { + s := v.AuxInt + _ = v.Args[1] + ptr := v.Args[0] + mem := v.Args[1] + if !(s%16 != 0 && s%16 > 8 && s > 16) { + break + } + v.reset(OpZero) + v.AuxInt = 16 + v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type) + v0.AuxInt = s - 16 + v0.AddArg(ptr) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem) + v1.AuxInt = s - s%16 v1.AddArg(ptr) v1.AddArg(mem) v.AddArg(v1)