mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: improve LoweredZero performance for ppc64x
This change improves the performance of the LoweredZero rule on ppc64x. The improvement can be seen in the runtime ClearFat benchmarks: BenchmarkClearFat12-16 2.40 0.69 -71.25% BenchmarkClearFat16-16 9.98 0.93 -90.68% BenchmarkClearFat24-16 4.75 0.93 -80.42% BenchmarkClearFat32-16 6.02 0.93 -84.55% BenchmarkClearFat40-16 7.19 1.16 -83.87% BenchmarkClearFat48-16 15.0 1.39 -90.73% BenchmarkClearFat56-16 9.95 1.62 -83.72% BenchmarkClearFat64-16 18.0 1.86 -89.67% BenchmarkClearFat128-16 30.0 8.08 -73.07% BenchmarkClearFat256-16 52.5 11.3 -78.48% BenchmarkClearFat512-16 97.0 19.0 -80.41% BenchmarkClearFat1024-16 244 34.2 -85.98% Fixes: #19532 Change-Id: If493e28bc1d8e61bc79978498be9f5336a36cd3f Reviewed-on: https://go-review.googlesource.com/38096 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Munday <munday@ca.ibm.com>
This commit is contained in:
parent
d972dc2de9
commit
23bd919136
5 changed files with 426 additions and 305 deletions
|
|
@ -312,19 +312,37 @@ func init() {
|
|||
|
||||
// large or unaligned zeroing
|
||||
// arg0 = address of memory to zero (in R3, changed as side effect)
|
||||
// arg1 = address of the last element to zero
|
||||
// arg2 = mem
|
||||
// returns mem
|
||||
// ADD -8,R3,R3 // intermediate value not valid GC ptr, cannot expose to opt+GC
|
||||
// MOVDU R0, 8(R3)
|
||||
// CMP R3, Rarg1
|
||||
// BLE -2(PC)
|
||||
//
|
||||
// a loop is generated when there is more than one iteration
|
||||
// needed to clear 4 doublewords
|
||||
//
|
||||
// MOVD $len/32,R31
|
||||
// MOVD R31,CTR
|
||||
// loop:
|
||||
// MOVD R0,(R3)
|
||||
// MOVD R0,8(R3)
|
||||
// MOVD R0,16(R3)
|
||||
// MOVD R0,24(R3)
|
||||
// ADD R3,32
|
||||
// BC loop
|
||||
|
||||
// remaining doubleword clears generated as needed
|
||||
// MOVD R0,(R3)
|
||||
// MOVD R0,8(R3)
|
||||
// MOVD R0,16(R3)
|
||||
// MOVD R0,24(R3)
|
||||
|
||||
// one or more of these to clear remainder < 8 bytes
|
||||
// MOVW R0,n1(R3)
|
||||
// MOVH R0,n2(R3)
|
||||
// MOVB R0,n3(R3)
|
||||
{
|
||||
name: "LoweredZero",
|
||||
aux: "Int64",
|
||||
argLength: 3,
|
||||
argLength: 2,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{buildReg("R3"), gp},
|
||||
inputs: []regMask{buildReg("R3")},
|
||||
clobbers: buildReg("R3"),
|
||||
},
|
||||
clobberFlags: true,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue