mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: improve LoweredZero performance for ppc64x
This change improves the performance of the LoweredZero rule on ppc64x. The improvement can be seen in the runtime ClearFat benchmarks: BenchmarkClearFat12-16 2.40 0.69 -71.25% BenchmarkClearFat16-16 9.98 0.93 -90.68% BenchmarkClearFat24-16 4.75 0.93 -80.42% BenchmarkClearFat32-16 6.02 0.93 -84.55% BenchmarkClearFat40-16 7.19 1.16 -83.87% BenchmarkClearFat48-16 15.0 1.39 -90.73% BenchmarkClearFat56-16 9.95 1.62 -83.72% BenchmarkClearFat64-16 18.0 1.86 -89.67% BenchmarkClearFat128-16 30.0 8.08 -73.07% BenchmarkClearFat256-16 52.5 11.3 -78.48% BenchmarkClearFat512-16 97.0 19.0 -80.41% BenchmarkClearFat1024-16 244 34.2 -85.98% Fixes: #19532 Change-Id: If493e28bc1d8e61bc79978498be9f5336a36cd3f Reviewed-on: https://go-review.googlesource.com/38096 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Munday <munday@ca.ibm.com>
This commit is contained in:
parent
d972dc2de9
commit
23bd919136
5 changed files with 426 additions and 305 deletions
|
|
@ -831,62 +831,135 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
ssaGenISEL(v, ppc64.C_COND_EQ, iselRegs[1], v.Reg())
|
||||
|
||||
case ssa.OpPPC64LoweredZero:
|
||||
// Similar to how this is done on ARM,
|
||||
// except that PPC MOVDU x,off(y) is *(y+off) = x; y=y+off
|
||||
// not store-and-increment.
|
||||
// Therefore R3 should be dest-align
|
||||
// and arg1 should be dest+size-align
|
||||
// HOWEVER, the input dest address cannot be dest-align because
|
||||
// that does not necessarily address valid memory and it's not
|
||||
// known how that might be optimized. Therefore, correct it in
|
||||
// in the expansion:
|
||||
|
||||
// unaligned data doesn't hurt performance
|
||||
// for these instructions on power8 or later
|
||||
|
||||
// for sizes >= 64 generate a loop as follows:
|
||||
|
||||
// set up loop counter in CTR, used by BC
|
||||
// MOVD len/32,REG_TMP
|
||||
// MOVD REG_TMP,CTR
|
||||
// loop:
|
||||
// MOVD R0,(R3)
|
||||
// MOVD R0,8(R3)
|
||||
// MOVD R0,16(R3)
|
||||
// MOVD R0,24(R3)
|
||||
// ADD $32,R3
|
||||
// BC 16, 0, loop
|
||||
//
|
||||
// ADD -8,R3,R3
|
||||
// MOVDU R0, 8(R3)
|
||||
// CMP R3, Rarg1
|
||||
// BL -2(PC)
|
||||
// arg1 is the address of the last element to zero
|
||||
// auxint is alignment
|
||||
var sz int64
|
||||
var movu obj.As
|
||||
switch {
|
||||
case v.AuxInt%8 == 0:
|
||||
sz = 8
|
||||
movu = ppc64.AMOVDU
|
||||
case v.AuxInt%4 == 0:
|
||||
sz = 4
|
||||
movu = ppc64.AMOVWZU // MOVWU instruction not implemented
|
||||
case v.AuxInt%2 == 0:
|
||||
sz = 2
|
||||
movu = ppc64.AMOVHU
|
||||
default:
|
||||
sz = 1
|
||||
movu = ppc64.AMOVBU
|
||||
}
|
||||
// any remainder is done as described below
|
||||
|
||||
p := gc.Prog(ppc64.AADD)
|
||||
p.Reg = v.Args[0].Reg()
|
||||
// for sizes < 64 bytes, first clear as many doublewords as possible,
|
||||
// then handle the remainder
|
||||
// MOVD R0,(R3)
|
||||
// MOVD R0,8(R3)
|
||||
// .... etc.
|
||||
//
|
||||
// the remainder bytes are cleared using one or more
|
||||
// of the following instructions with the appropriate
|
||||
// offsets depending which instructions are needed
|
||||
//
|
||||
// MOVW R0,n1(R3) 4 bytes
|
||||
// MOVH R0,n2(R3) 2 bytes
|
||||
// MOVB R0,n3(R3) 1 byte
|
||||
//
|
||||
// 7 bytes: MOVW, MOVH, MOVB
|
||||
// 6 bytes: MOVW, MOVH
|
||||
// 5 bytes: MOVW, MOVB
|
||||
// 3 bytes: MOVH, MOVB
|
||||
|
||||
// each loop iteration does 32 bytes
|
||||
ctr := v.AuxInt / 32
|
||||
|
||||
// remainder bytes
|
||||
rem := v.AuxInt % 32
|
||||
|
||||
// only generate a loop if there is more
|
||||
// than 1 iteration.
|
||||
if ctr > 1 {
|
||||
// Set up CTR loop counter
|
||||
p := gc.Prog(ppc64.AMOVD)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = -sz
|
||||
p.From.Offset = ctr
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
p.To.Reg = ppc64.REGTMP
|
||||
|
||||
p = gc.Prog(movu)
|
||||
p = gc.Prog(ppc64.AMOVD)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = ppc64.REGTMP
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = ppc64.REG_CTR
|
||||
|
||||
// generate 4 MOVDs
|
||||
// when this is a loop then the top must be saved
|
||||
var top *obj.Prog
|
||||
for offset := int64(0); offset < 32; offset += 8 {
|
||||
// This is the top of loop
|
||||
p := gc.Prog(ppc64.AMOVD)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = ppc64.REG_R0
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
p.To.Offset = sz
|
||||
p.To.Offset = offset
|
||||
// Save the top of loop
|
||||
if top == nil {
|
||||
top = p
|
||||
}
|
||||
}
|
||||
|
||||
p2 := gc.Prog(ppc64.ACMPU)
|
||||
p2.From.Type = obj.TYPE_REG
|
||||
p2.From.Reg = v.Args[0].Reg()
|
||||
p2.To.Reg = v.Args[1].Reg()
|
||||
p2.To.Type = obj.TYPE_REG
|
||||
// Increment address for the
|
||||
// 4 doublewords just zeroed.
|
||||
p = gc.Prog(ppc64.AADD)
|
||||
p.Reg = v.Args[0].Reg()
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = 32
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
|
||||
p3 := gc.Prog(ppc64.ABLT)
|
||||
p3.To.Type = obj.TYPE_BRANCH
|
||||
gc.Patch(p3, p)
|
||||
// Branch back to top of loop
|
||||
// based on CTR
|
||||
// BC with BO_BCTR generates bdnz
|
||||
p = gc.Prog(ppc64.ABC)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = ppc64.BO_BCTR
|
||||
p.Reg = ppc64.REG_R0
|
||||
p.To.Type = obj.TYPE_BRANCH
|
||||
gc.Patch(p, top)
|
||||
}
|
||||
|
||||
// when ctr == 1 the loop was not generated but
|
||||
// there are at least 32 bytes to clear, so add
|
||||
// that to the remainder to generate the code
|
||||
// to clear those doublewords
|
||||
if ctr == 1 {
|
||||
rem += 32
|
||||
}
|
||||
|
||||
// clear the remainder starting at offset zero
|
||||
offset := int64(0)
|
||||
|
||||
// first clear as many doublewords as possible
|
||||
// then clear remaining sizes as available
|
||||
for rem > 0 {
|
||||
op, size := ppc64.AMOVB, int64(1)
|
||||
switch {
|
||||
case rem >= 8:
|
||||
op, size = ppc64.AMOVD, 8
|
||||
case rem >= 4:
|
||||
op, size = ppc64.AMOVW, 4
|
||||
case rem >= 2:
|
||||
op, size = ppc64.AMOVH, 2
|
||||
}
|
||||
p := gc.Prog(op)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = ppc64.REG_R0
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
p.To.Offset = offset
|
||||
rem -= size
|
||||
offset += size
|
||||
}
|
||||
|
||||
case ssa.OpPPC64LoweredMove:
|
||||
// Similar to how this is done on ARM,
|
||||
|
|
|
|||
|
|
@ -485,60 +485,73 @@
|
|||
(Store {t} ptr val mem) && t.(Type).Size() == 2 -> (MOVHstore ptr val mem)
|
||||
(Store {t} ptr val mem) && t.(Type).Size() == 1 -> (MOVBstore ptr val mem)
|
||||
|
||||
// Using Zero instead of LoweredZero allows the
|
||||
// target address to be folded where possible.
|
||||
(Zero [0] _ mem) -> mem
|
||||
(Zero [1] destptr mem) -> (MOVBstorezero destptr mem)
|
||||
(Zero [2] {t} destptr mem) && t.(Type).Alignment()%2 == 0 ->
|
||||
(MOVHstorezero destptr mem)
|
||||
(Zero [2] destptr mem) ->
|
||||
(MOVBstorezero [1] destptr
|
||||
(MOVBstorezero [0] destptr mem))
|
||||
(Zero [4] {t} destptr mem) && t.(Type).Alignment()%4 == 0 ->
|
||||
(MOVWstorezero destptr mem)
|
||||
(Zero [4] {t} destptr mem) && t.(Type).Alignment()%2 == 0 ->
|
||||
(MOVHstorezero [2] destptr
|
||||
(MOVHstorezero [0] destptr mem))
|
||||
(Zero [4] destptr mem) ->
|
||||
(MOVBstorezero [3] destptr
|
||||
(MOVBstorezero [2] destptr
|
||||
(MOVBstorezero [1] destptr
|
||||
(MOVBstorezero [0] destptr mem))))
|
||||
(Zero [8] {t} destptr mem) && t.(Type).Alignment()%8 == 0 ->
|
||||
(MOVDstorezero [0] destptr mem)
|
||||
(Zero [8] {t} destptr mem) && t.(Type).Alignment()%4 == 0 ->
|
||||
(MOVWstorezero [4] destptr
|
||||
(MOVWstorezero [0] destptr mem))
|
||||
(Zero [8] {t} destptr mem) && t.(Type).Alignment()%2 == 0 ->
|
||||
(MOVHstorezero [6] destptr
|
||||
(MOVHstorezero [4] destptr
|
||||
(MOVHstorezero [2] destptr
|
||||
(MOVHstorezero [0] destptr mem))))
|
||||
|
||||
(MOVHstorezero destptr mem)
|
||||
(Zero [3] destptr mem) ->
|
||||
(MOVBstorezero [2] destptr
|
||||
(MOVBstorezero [1] destptr
|
||||
(MOVBstorezero [0] destptr mem)))
|
||||
(MOVHstorezero destptr mem))
|
||||
(Zero [4] destptr mem) ->
|
||||
(MOVWstorezero destptr mem)
|
||||
(Zero [5] destptr mem) ->
|
||||
(MOVBstorezero [4] destptr
|
||||
(MOVWstorezero destptr mem))
|
||||
(Zero [6] destptr mem) ->
|
||||
(MOVHstorezero [4] destptr
|
||||
(MOVWstorezero destptr mem))
|
||||
(Zero [7] destptr mem) ->
|
||||
(MOVBstorezero [6] destptr
|
||||
(MOVHstorezero [4] destptr
|
||||
(MOVWstorezero destptr mem)))
|
||||
(Zero [8] destptr mem) ->
|
||||
(MOVDstorezero destptr mem)
|
||||
|
||||
// Zero small numbers of words directly.
|
||||
(Zero [16] {t} destptr mem) && t.(Type).Alignment()%8 == 0 ->
|
||||
(Zero [12] destptr mem) ->
|
||||
(MOVWstorezero [8] destptr
|
||||
(MOVDstorezero [0] destptr mem))
|
||||
(Zero [16] destptr mem) ->
|
||||
(MOVDstorezero [8] destptr
|
||||
(MOVDstorezero [0] destptr mem))
|
||||
(Zero [24] {t} destptr mem) && t.(Type).Alignment()%8 == 0 ->
|
||||
(Zero [24] destptr mem) ->
|
||||
(MOVDstorezero [16] destptr
|
||||
(MOVDstorezero [8] destptr
|
||||
(MOVDstorezero [0] destptr mem)))
|
||||
(Zero [32] {t} destptr mem) && t.(Type).Alignment()%8 == 0 ->
|
||||
(Zero [32] destptr mem) ->
|
||||
(MOVDstorezero [24] destptr
|
||||
(MOVDstorezero [16] destptr
|
||||
(MOVDstorezero [8] destptr
|
||||
(MOVDstorezero [0] destptr mem))))
|
||||
|
||||
// Large zeroing uses a loop
|
||||
(Zero [s] {t} ptr mem)
|
||||
&& (s > 512 || config.noDuffDevice) || t.(Type).Alignment()%8 != 0 ->
|
||||
(LoweredZero [t.(Type).Alignment()]
|
||||
ptr
|
||||
(ADDconst <ptr.Type> ptr [s-moveSize(t.(Type).Alignment(), config)])
|
||||
mem)
|
||||
(Zero [40] destptr mem) ->
|
||||
(MOVDstorezero [32] destptr
|
||||
(MOVDstorezero [24] destptr
|
||||
(MOVDstorezero [16] destptr
|
||||
(MOVDstorezero [8] destptr
|
||||
(MOVDstorezero [0] destptr mem)))))
|
||||
|
||||
(Zero [48] destptr mem) ->
|
||||
(MOVDstorezero [40] destptr
|
||||
(MOVDstorezero [32] destptr
|
||||
(MOVDstorezero [24] destptr
|
||||
(MOVDstorezero [16] destptr
|
||||
(MOVDstorezero [8] destptr
|
||||
(MOVDstorezero [0] destptr mem))))))
|
||||
|
||||
(Zero [56] destptr mem) ->
|
||||
(MOVDstorezero [48] destptr
|
||||
(MOVDstorezero [40] destptr
|
||||
(MOVDstorezero [32] destptr
|
||||
(MOVDstorezero [24] destptr
|
||||
(MOVDstorezero [16] destptr
|
||||
(MOVDstorezero [8] destptr
|
||||
(MOVDstorezero [0] destptr mem)))))))
|
||||
|
||||
// Handle cases not handled above
|
||||
(Zero [s] ptr mem) -> (LoweredZero [s] ptr mem)
|
||||
|
||||
// moves
|
||||
(Move [0] _ _ mem) -> mem
|
||||
|
|
|
|||
|
|
@ -312,19 +312,37 @@ func init() {
|
|||
|
||||
// large or unaligned zeroing
|
||||
// arg0 = address of memory to zero (in R3, changed as side effect)
|
||||
// arg1 = address of the last element to zero
|
||||
// arg2 = mem
|
||||
// returns mem
|
||||
// ADD -8,R3,R3 // intermediate value not valid GC ptr, cannot expose to opt+GC
|
||||
// MOVDU R0, 8(R3)
|
||||
// CMP R3, Rarg1
|
||||
// BLE -2(PC)
|
||||
//
|
||||
// a loop is generated when there is more than one iteration
|
||||
// needed to clear 4 doublewords
|
||||
//
|
||||
// MOVD $len/32,R31
|
||||
// MOVD R31,CTR
|
||||
// loop:
|
||||
// MOVD R0,(R3)
|
||||
// MOVD R0,8(R3)
|
||||
// MOVD R0,16(R3)
|
||||
// MOVD R0,24(R3)
|
||||
// ADD R3,32
|
||||
// BC loop
|
||||
|
||||
// remaining doubleword clears generated as needed
|
||||
// MOVD R0,(R3)
|
||||
// MOVD R0,8(R3)
|
||||
// MOVD R0,16(R3)
|
||||
// MOVD R0,24(R3)
|
||||
|
||||
// one or more of these to clear remainder < 8 bytes
|
||||
// MOVW R0,n1(R3)
|
||||
// MOVH R0,n2(R3)
|
||||
// MOVB R0,n3(R3)
|
||||
{
|
||||
name: "LoweredZero",
|
||||
aux: "Int64",
|
||||
argLength: 3,
|
||||
argLength: 2,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{buildReg("R3"), gp},
|
||||
inputs: []regMask{buildReg("R3")},
|
||||
clobbers: buildReg("R3"),
|
||||
},
|
||||
clobberFlags: true,
|
||||
|
|
|
|||
|
|
@ -17368,13 +17368,12 @@ var opcodeTable = [...]opInfo{
|
|||
{
|
||||
name: "LoweredZero",
|
||||
auxType: auxInt64,
|
||||
argLen: 3,
|
||||
argLen: 2,
|
||||
clobberFlags: true,
|
||||
faultOnNilArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 8}, // R3
|
||||
{1, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
clobbers: 8, // R3
|
||||
},
|
||||
|
|
|
|||
|
|
@ -9656,8 +9656,6 @@ func rewriteValuePPC64_OpXor8(v *Value) bool {
|
|||
func rewriteValuePPC64_OpZero(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
config := b.Func.Config
|
||||
_ = config
|
||||
// match: (Zero [0] _ mem)
|
||||
// cond:
|
||||
// result: mem
|
||||
|
|
@ -9685,187 +9683,23 @@ func rewriteValuePPC64_OpZero(v *Value) bool {
|
|||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [2] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%2 == 0
|
||||
// match: (Zero [2] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVHstorezero destptr mem)
|
||||
for {
|
||||
if v.AuxInt != 2 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%2 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVHstorezero)
|
||||
v.AddArg(destptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [2] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVBstorezero [1] destptr (MOVBstorezero [0] destptr mem))
|
||||
for {
|
||||
if v.AuxInt != 2 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVBstorezero)
|
||||
v.AuxInt = 1
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem)
|
||||
v0.AuxInt = 0
|
||||
v0.AddArg(destptr)
|
||||
v0.AddArg(mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [4] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%4 == 0
|
||||
// result: (MOVWstorezero destptr mem)
|
||||
for {
|
||||
if v.AuxInt != 4 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%4 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVWstorezero)
|
||||
v.AddArg(destptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [4] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%2 == 0
|
||||
// result: (MOVHstorezero [2] destptr (MOVHstorezero [0] destptr mem))
|
||||
for {
|
||||
if v.AuxInt != 4 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%2 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVHstorezero)
|
||||
v.AuxInt = 2
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
|
||||
v0.AuxInt = 0
|
||||
v0.AddArg(destptr)
|
||||
v0.AddArg(mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [4] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVBstorezero [3] destptr (MOVBstorezero [2] destptr (MOVBstorezero [1] destptr (MOVBstorezero [0] destptr mem))))
|
||||
for {
|
||||
if v.AuxInt != 4 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVBstorezero)
|
||||
v.AuxInt = 3
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem)
|
||||
v0.AuxInt = 2
|
||||
v0.AddArg(destptr)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem)
|
||||
v1.AuxInt = 1
|
||||
v1.AddArg(destptr)
|
||||
v2 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem)
|
||||
v2.AuxInt = 0
|
||||
v2.AddArg(destptr)
|
||||
v2.AddArg(mem)
|
||||
v1.AddArg(v2)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [8] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%8 == 0
|
||||
// result: (MOVDstorezero [0] destptr mem)
|
||||
for {
|
||||
if v.AuxInt != 8 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%8 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVDstorezero)
|
||||
v.AuxInt = 0
|
||||
v.AddArg(destptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [8] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%4 == 0
|
||||
// result: (MOVWstorezero [4] destptr (MOVWstorezero [0] destptr mem))
|
||||
for {
|
||||
if v.AuxInt != 8 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%4 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVWstorezero)
|
||||
v.AuxInt = 4
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, TypeMem)
|
||||
v0.AuxInt = 0
|
||||
v0.AddArg(destptr)
|
||||
v0.AddArg(mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [8] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%2 == 0
|
||||
// result: (MOVHstorezero [6] destptr (MOVHstorezero [4] destptr (MOVHstorezero [2] destptr (MOVHstorezero [0] destptr mem))))
|
||||
for {
|
||||
if v.AuxInt != 8 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%2 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVHstorezero)
|
||||
v.AuxInt = 6
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
|
||||
v0.AuxInt = 4
|
||||
v0.AddArg(destptr)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
|
||||
v1.AuxInt = 2
|
||||
v1.AddArg(destptr)
|
||||
v2 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
|
||||
v2.AuxInt = 0
|
||||
v2.AddArg(destptr)
|
||||
v2.AddArg(mem)
|
||||
v1.AddArg(v2)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [3] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVBstorezero [2] destptr (MOVBstorezero [1] destptr (MOVBstorezero [0] destptr mem)))
|
||||
// result: (MOVBstorezero [2] destptr (MOVHstorezero destptr mem))
|
||||
for {
|
||||
if v.AuxInt != 3 {
|
||||
break
|
||||
|
|
@ -9875,30 +9709,126 @@ func rewriteValuePPC64_OpZero(v *Value) bool {
|
|||
v.reset(OpPPC64MOVBstorezero)
|
||||
v.AuxInt = 2
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem)
|
||||
v0.AuxInt = 1
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
|
||||
v0.AddArg(destptr)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem)
|
||||
v1.AuxInt = 0
|
||||
v0.AddArg(mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [4] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVWstorezero destptr mem)
|
||||
for {
|
||||
if v.AuxInt != 4 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVWstorezero)
|
||||
v.AddArg(destptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [5] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVBstorezero [4] destptr (MOVWstorezero destptr mem))
|
||||
for {
|
||||
if v.AuxInt != 5 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVBstorezero)
|
||||
v.AuxInt = 4
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, TypeMem)
|
||||
v0.AddArg(destptr)
|
||||
v0.AddArg(mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [6] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVHstorezero [4] destptr (MOVWstorezero destptr mem))
|
||||
for {
|
||||
if v.AuxInt != 6 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVHstorezero)
|
||||
v.AuxInt = 4
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, TypeMem)
|
||||
v0.AddArg(destptr)
|
||||
v0.AddArg(mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [7] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVBstorezero [6] destptr (MOVHstorezero [4] destptr (MOVWstorezero destptr mem)))
|
||||
for {
|
||||
if v.AuxInt != 7 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVBstorezero)
|
||||
v.AuxInt = 6
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
|
||||
v0.AuxInt = 4
|
||||
v0.AddArg(destptr)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, TypeMem)
|
||||
v1.AddArg(destptr)
|
||||
v1.AddArg(mem)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [16] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%8 == 0
|
||||
// match: (Zero [8] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVDstorezero destptr mem)
|
||||
for {
|
||||
if v.AuxInt != 8 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVDstorezero)
|
||||
v.AddArg(destptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [12] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVWstorezero [8] destptr (MOVDstorezero [0] destptr mem))
|
||||
for {
|
||||
if v.AuxInt != 12 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVWstorezero)
|
||||
v.AuxInt = 8
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v0.AuxInt = 0
|
||||
v0.AddArg(destptr)
|
||||
v0.AddArg(mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [16] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))
|
||||
for {
|
||||
if v.AuxInt != 16 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%8 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVDstorezero)
|
||||
v.AuxInt = 8
|
||||
v.AddArg(destptr)
|
||||
|
|
@ -9909,19 +9839,15 @@ func rewriteValuePPC64_OpZero(v *Value) bool {
|
|||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [24] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%8 == 0
|
||||
// match: (Zero [24] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))
|
||||
for {
|
||||
if v.AuxInt != 24 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%8 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVDstorezero)
|
||||
v.AuxInt = 16
|
||||
v.AddArg(destptr)
|
||||
|
|
@ -9936,19 +9862,15 @@ func rewriteValuePPC64_OpZero(v *Value) bool {
|
|||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [32] {t} destptr mem)
|
||||
// cond: t.(Type).Alignment()%8 == 0
|
||||
// match: (Zero [32] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))))
|
||||
for {
|
||||
if v.AuxInt != 32 {
|
||||
break
|
||||
}
|
||||
t := v.Aux
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(t.(Type).Alignment()%8 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64MOVDstorezero)
|
||||
v.AuxInt = 24
|
||||
v.AddArg(destptr)
|
||||
|
|
@ -9967,28 +9889,124 @@ func rewriteValuePPC64_OpZero(v *Value) bool {
|
|||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] {t} ptr mem)
|
||||
// cond: (s > 512 || config.noDuffDevice) || t.(Type).Alignment()%8 != 0
|
||||
// result: (LoweredZero [t.(Type).Alignment()] ptr (ADDconst <ptr.Type> ptr [s-moveSize(t.(Type).Alignment(), config)]) mem)
|
||||
// match: (Zero [40] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))))
|
||||
for {
|
||||
s := v.AuxInt
|
||||
t := v.Aux
|
||||
ptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !((s > 512 || config.noDuffDevice) || t.(Type).Alignment()%8 != 0) {
|
||||
if v.AuxInt != 40 {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64LoweredZero)
|
||||
v.AuxInt = t.(Type).Alignment()
|
||||
v.AddArg(ptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64ADDconst, ptr.Type)
|
||||
v0.AuxInt = s - moveSize(t.(Type).Alignment(), config)
|
||||
v0.AddArg(ptr)
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVDstorezero)
|
||||
v.AuxInt = 32
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v0.AuxInt = 24
|
||||
v0.AddArg(destptr)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v1.AuxInt = 16
|
||||
v1.AddArg(destptr)
|
||||
v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v2.AuxInt = 8
|
||||
v2.AddArg(destptr)
|
||||
v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v3.AuxInt = 0
|
||||
v3.AddArg(destptr)
|
||||
v3.AddArg(mem)
|
||||
v2.AddArg(v3)
|
||||
v1.AddArg(v2)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [48] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVDstorezero [40] destptr (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))))))
|
||||
for {
|
||||
if v.AuxInt != 48 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVDstorezero)
|
||||
v.AuxInt = 40
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v0.AuxInt = 32
|
||||
v0.AddArg(destptr)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v1.AuxInt = 24
|
||||
v1.AddArg(destptr)
|
||||
v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v2.AuxInt = 16
|
||||
v2.AddArg(destptr)
|
||||
v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v3.AuxInt = 8
|
||||
v3.AddArg(destptr)
|
||||
v4 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v4.AuxInt = 0
|
||||
v4.AddArg(destptr)
|
||||
v4.AddArg(mem)
|
||||
v3.AddArg(v4)
|
||||
v2.AddArg(v3)
|
||||
v1.AddArg(v2)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [56] destptr mem)
|
||||
// cond:
|
||||
// result: (MOVDstorezero [48] destptr (MOVDstorezero [40] destptr (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))))))
|
||||
for {
|
||||
if v.AuxInt != 56 {
|
||||
break
|
||||
}
|
||||
destptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64MOVDstorezero)
|
||||
v.AuxInt = 48
|
||||
v.AddArg(destptr)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v0.AuxInt = 40
|
||||
v0.AddArg(destptr)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v1.AuxInt = 32
|
||||
v1.AddArg(destptr)
|
||||
v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v2.AuxInt = 24
|
||||
v2.AddArg(destptr)
|
||||
v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v3.AuxInt = 16
|
||||
v3.AddArg(destptr)
|
||||
v4 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v4.AuxInt = 8
|
||||
v4.AddArg(destptr)
|
||||
v5 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
|
||||
v5.AuxInt = 0
|
||||
v5.AddArg(destptr)
|
||||
v5.AddArg(mem)
|
||||
v4.AddArg(v5)
|
||||
v3.AddArg(v4)
|
||||
v2.AddArg(v3)
|
||||
v1.AddArg(v2)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] ptr mem)
|
||||
// cond:
|
||||
// result: (LoweredZero [s] ptr mem)
|
||||
for {
|
||||
s := v.AuxInt
|
||||
ptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
v.reset(OpPPC64LoweredZero)
|
||||
v.AuxInt = s
|
||||
v.AddArg(ptr)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64_OpZeroExt16to32(v *Value) bool {
|
||||
// match: (ZeroExt16to32 x)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue