mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: use generated loops instead of DUFFZERO on arm64
Change-Id: Ie0c8263f36d1bcfd0edfc4ea6710ae6c113c4d48 Reviewed-on: https://go-review.googlesource.com/c/go/+/678995 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Jorropo <jorropo.pgm@gmail.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
ec9e1176c3
commit
28aa529c99
5 changed files with 187 additions and 222 deletions
|
|
@ -1050,33 +1050,118 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
p.From.Offset = int64(condCode)
|
p.From.Offset = int64(condCode)
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = v.Reg()
|
p.To.Reg = v.Reg()
|
||||||
case ssa.OpARM64DUFFZERO:
|
|
||||||
// runtime.duffzero expects start address in R20
|
|
||||||
p := s.Prog(obj.ADUFFZERO)
|
|
||||||
p.To.Type = obj.TYPE_MEM
|
|
||||||
p.To.Name = obj.NAME_EXTERN
|
|
||||||
p.To.Sym = ir.Syms.Duffzero
|
|
||||||
p.To.Offset = v.AuxInt
|
|
||||||
case ssa.OpARM64LoweredZero:
|
case ssa.OpARM64LoweredZero:
|
||||||
// STP.P (ZR,ZR), 16(R16)
|
ptrReg := v.Args[0].Reg()
|
||||||
// CMP Rarg1, R16
|
n := v.AuxInt
|
||||||
// BLE -2(PC)
|
if n < 16 {
|
||||||
// arg1 is the address of the last 16-byte unit to zero
|
v.Fatalf("Zero too small %d", n)
|
||||||
p := s.Prog(arm64.ASTP)
|
}
|
||||||
p.Scond = arm64.C_XPOST
|
|
||||||
p.From.Type = obj.TYPE_REGREG
|
// Generate zeroing instructions.
|
||||||
p.From.Reg = arm64.REGZERO
|
var off int64
|
||||||
p.From.Offset = int64(arm64.REGZERO)
|
for n >= 16 {
|
||||||
p.To.Type = obj.TYPE_MEM
|
// STP (ZR, ZR), off(ptrReg)
|
||||||
p.To.Reg = arm64.REG_R16
|
zero16(s, ptrReg, off, false)
|
||||||
p.To.Offset = 16
|
off += 16
|
||||||
p2 := s.Prog(arm64.ACMP)
|
n -= 16
|
||||||
p2.From.Type = obj.TYPE_REG
|
}
|
||||||
p2.From.Reg = v.Args[1].Reg()
|
// Write any fractional portion.
|
||||||
p2.Reg = arm64.REG_R16
|
// An overlapping 16-byte write can't be used here
|
||||||
p3 := s.Prog(arm64.ABLE)
|
// because STP's offsets must be a multiple of 8.
|
||||||
p3.To.Type = obj.TYPE_BRANCH
|
if n > 8 {
|
||||||
p3.To.SetTarget(p)
|
// MOVD ZR, off(ptrReg)
|
||||||
|
zero8(s, ptrReg, off)
|
||||||
|
off += 8
|
||||||
|
n -= 8
|
||||||
|
}
|
||||||
|
if n != 0 {
|
||||||
|
// MOVD ZR, off+n-8(ptrReg)
|
||||||
|
// TODO: for n<=4 we could use a smaller write.
|
||||||
|
zero8(s, ptrReg, off+n-8)
|
||||||
|
}
|
||||||
|
case ssa.OpARM64LoweredZeroLoop:
|
||||||
|
ptrReg := v.Args[0].Reg()
|
||||||
|
countReg := v.RegTmp()
|
||||||
|
n := v.AuxInt
|
||||||
|
loopSize := int64(64)
|
||||||
|
if n < 3*loopSize {
|
||||||
|
// - a loop count of 0 won't work.
|
||||||
|
// - a loop count of 1 is useless.
|
||||||
|
// - a loop count of 2 is a code size ~tie
|
||||||
|
// 3 instructions to implement the loop
|
||||||
|
// 4 instructions in the loop body
|
||||||
|
// vs
|
||||||
|
// 8 instructions in the straightline code
|
||||||
|
// Might as well use straightline code.
|
||||||
|
v.Fatalf("ZeroLoop size too small %d", n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put iteration count in a register.
|
||||||
|
// MOVD $n, countReg
|
||||||
|
p := s.Prog(arm64.AMOVD)
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = n / loopSize
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = countReg
|
||||||
|
cntInit := p
|
||||||
|
|
||||||
|
// Zero loopSize bytes starting at ptrReg.
|
||||||
|
// Increment ptrReg by loopSize as a side effect.
|
||||||
|
for range loopSize / 16 {
|
||||||
|
// STP.P (ZR, ZR), 16(ptrReg)
|
||||||
|
zero16(s, ptrReg, 0, true)
|
||||||
|
// TODO: should we use the postincrement form,
|
||||||
|
// or use a separate += 64 instruction?
|
||||||
|
// postincrement saves an instruction, but maybe
|
||||||
|
// it requires more integer units to do the +=16s.
|
||||||
|
}
|
||||||
|
// Decrement loop count.
|
||||||
|
// SUB $1, countReg
|
||||||
|
p = s.Prog(arm64.ASUB)
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = 1
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = countReg
|
||||||
|
// Jump to loop header if we're not done yet.
|
||||||
|
// CBNZ head
|
||||||
|
p = s.Prog(arm64.ACBNZ)
|
||||||
|
p.From.Type = obj.TYPE_REG
|
||||||
|
p.From.Reg = countReg
|
||||||
|
p.To.Type = obj.TYPE_BRANCH
|
||||||
|
p.To.SetTarget(cntInit.Link)
|
||||||
|
|
||||||
|
// Multiples of the loop size are now done.
|
||||||
|
n %= loopSize
|
||||||
|
|
||||||
|
// Write any fractional portion.
|
||||||
|
var off int64
|
||||||
|
for n >= 16 {
|
||||||
|
// STP (ZR, ZR), off(ptrReg)
|
||||||
|
zero16(s, ptrReg, off, false)
|
||||||
|
off += 16
|
||||||
|
n -= 16
|
||||||
|
}
|
||||||
|
if n > 8 {
|
||||||
|
// Note: an overlapping 16-byte write can't be used
|
||||||
|
// here because STP's offsets must be a multiple of 8.
|
||||||
|
// MOVD ZR, off(ptrReg)
|
||||||
|
zero8(s, ptrReg, off)
|
||||||
|
off += 8
|
||||||
|
n -= 8
|
||||||
|
}
|
||||||
|
if n != 0 {
|
||||||
|
// MOVD ZR, off+n-8(ptrReg)
|
||||||
|
// TODO: for n<=4 we could use a smaller write.
|
||||||
|
zero8(s, ptrReg, off+n-8)
|
||||||
|
}
|
||||||
|
// TODO: maybe we should use the count register to instead
|
||||||
|
// hold an end pointer and compare against that?
|
||||||
|
// ADD $n, ptrReg, endReg
|
||||||
|
// then
|
||||||
|
// CMP ptrReg, endReg
|
||||||
|
// BNE loop
|
||||||
|
// There's a past-the-end pointer here, any problem with that?
|
||||||
|
|
||||||
case ssa.OpARM64DUFFCOPY:
|
case ssa.OpARM64DUFFCOPY:
|
||||||
p := s.Prog(obj.ADUFFCOPY)
|
p := s.Prog(obj.ADUFFCOPY)
|
||||||
p.To.Type = obj.TYPE_MEM
|
p.To.Type = obj.TYPE_MEM
|
||||||
|
|
@ -1482,3 +1567,35 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
|
||||||
p.Pos = p.Pos.WithNotStmt()
|
p.Pos = p.Pos.WithNotStmt()
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// zero16 zeroes 16 bytes at reg+off.
|
||||||
|
// If postInc is true, increment reg by 16.
|
||||||
|
func zero16(s *ssagen.State, reg int16, off int64, postInc bool) {
|
||||||
|
// STP (ZR, ZR), off(reg)
|
||||||
|
p := s.Prog(arm64.ASTP)
|
||||||
|
p.From.Type = obj.TYPE_REGREG
|
||||||
|
p.From.Reg = arm64.REGZERO
|
||||||
|
p.From.Offset = int64(arm64.REGZERO)
|
||||||
|
p.To.Type = obj.TYPE_MEM
|
||||||
|
p.To.Reg = reg
|
||||||
|
p.To.Offset = off
|
||||||
|
if postInc {
|
||||||
|
if off != 0 {
|
||||||
|
panic("can't postinc with non-zero offset")
|
||||||
|
}
|
||||||
|
// STP.P (ZR, ZR), 16(reg)
|
||||||
|
p.Scond = arm64.C_XPOST
|
||||||
|
p.To.Offset = 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// zero8 zeroes 8 bytes at reg+off.
|
||||||
|
func zero8(s *ssagen.State, reg int16, off int64) {
|
||||||
|
// MOVD ZR, off(reg)
|
||||||
|
p := s.Prog(arm64.AMOVD)
|
||||||
|
p.From.Type = obj.TYPE_REG
|
||||||
|
p.From.Reg = arm64.REGZERO
|
||||||
|
p.To.Type = obj.TYPE_MEM
|
||||||
|
p.To.Reg = reg
|
||||||
|
p.To.Offset = off
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -392,44 +392,8 @@
|
||||||
(Zero [16] ptr mem) =>
|
(Zero [16] ptr mem) =>
|
||||||
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)
|
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)
|
||||||
|
|
||||||
(Zero [32] ptr mem) =>
|
(Zero [s] ptr mem) && s > 16 && s < 192 => (LoweredZero [s] ptr mem)
|
||||||
(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
|
(Zero [s] ptr mem) && s >= 192 => (LoweredZeroLoop [s] ptr mem)
|
||||||
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
|
|
||||||
|
|
||||||
(Zero [48] ptr mem) =>
|
|
||||||
(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
|
|
||||||
(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
|
|
||||||
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
|
|
||||||
|
|
||||||
(Zero [64] ptr mem) =>
|
|
||||||
(STP [48] ptr (MOVDconst [0]) (MOVDconst [0])
|
|
||||||
(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
|
|
||||||
(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
|
|
||||||
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
|
|
||||||
|
|
||||||
// strip off fractional word zeroing
|
|
||||||
(Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
|
|
||||||
(Zero [8]
|
|
||||||
(OffPtr <ptr.Type> ptr [s-8])
|
|
||||||
(Zero [s-s%16] ptr mem))
|
|
||||||
(Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 =>
|
|
||||||
(Zero [16]
|
|
||||||
(OffPtr <ptr.Type> ptr [s-16])
|
|
||||||
(Zero [s-s%16] ptr mem))
|
|
||||||
|
|
||||||
// medium zeroing uses a duff device
|
|
||||||
// 4, 16, and 64 are magic constants, see runtime/mkduff.go
|
|
||||||
(Zero [s] ptr mem)
|
|
||||||
&& s%16 == 0 && s > 64 && s <= 16*64 =>
|
|
||||||
(DUFFZERO [4 * (64 - s/16)] ptr mem)
|
|
||||||
|
|
||||||
// large zeroing uses a loop
|
|
||||||
(Zero [s] ptr mem)
|
|
||||||
&& s%16 == 0 && s > 16*64 =>
|
|
||||||
(LoweredZero
|
|
||||||
ptr
|
|
||||||
(ADDconst <ptr.Type> [s-16] ptr)
|
|
||||||
mem)
|
|
||||||
|
|
||||||
// moves
|
// moves
|
||||||
(Move [0] _ _ mem) => mem
|
(Move [0] _ _ mem) => mem
|
||||||
|
|
|
||||||
|
|
@ -536,44 +536,36 @@ func init() {
|
||||||
{name: "LessThanNoov", argLength: 1, reg: readflags}, // bool, true flags encode signed x<y but without honoring overflow, false otherwise.
|
{name: "LessThanNoov", argLength: 1, reg: readflags}, // bool, true flags encode signed x<y but without honoring overflow, false otherwise.
|
||||||
{name: "GreaterEqualNoov", argLength: 1, reg: readflags}, // bool, true flags encode signed x>=y but without honoring overflow, false otherwise.
|
{name: "GreaterEqualNoov", argLength: 1, reg: readflags}, // bool, true flags encode signed x>=y but without honoring overflow, false otherwise.
|
||||||
|
|
||||||
// duffzero
|
// medium zeroing
|
||||||
// arg0 = address of memory to zero
|
// arg0 = address of memory to zero
|
||||||
// arg1 = mem
|
// arg1 = mem
|
||||||
// auxint = offset into duffzero code to start executing
|
// auxint = # of bytes to zero
|
||||||
// returns mem
|
// returns mem
|
||||||
// R20 changed as side effect
|
|
||||||
// R16 and R17 may be clobbered by linker trampoline.
|
|
||||||
{
|
{
|
||||||
name: "DUFFZERO",
|
name: "LoweredZero",
|
||||||
aux: "Int64",
|
aux: "Int64",
|
||||||
argLength: 2,
|
argLength: 2,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []regMask{buildReg("R20")},
|
inputs: []regMask{gp},
|
||||||
clobbers: buildReg("R16 R17 R20 R30"),
|
|
||||||
},
|
},
|
||||||
//faultOnNilArg0: true, // Note: removed for 73748. TODO: reenable at some point
|
faultOnNilArg0: true,
|
||||||
unsafePoint: true, // FP maintenance around DUFFZERO can be clobbered by interrupts
|
|
||||||
},
|
},
|
||||||
|
|
||||||
// large zeroing
|
// large zeroing
|
||||||
// arg0 = address of memory to zero (in R16 aka arm64.REGRT1, changed as side effect)
|
// arg0 = address of memory to zero
|
||||||
// arg1 = address of the last 16-byte unit to zero
|
// arg1 = mem
|
||||||
// arg2 = mem
|
// auxint = # of bytes to zero
|
||||||
// returns mem
|
// returns mem
|
||||||
// STP.P (ZR,ZR), 16(R16)
|
|
||||||
// CMP Rarg1, R16
|
|
||||||
// BLE -2(PC)
|
|
||||||
// Note: the-end-of-the-memory may be not a valid pointer. it's a problem if it is spilled.
|
|
||||||
// the-end-of-the-memory - 16 is with the area to zero, ok to spill.
|
|
||||||
{
|
{
|
||||||
name: "LoweredZero",
|
name: "LoweredZeroLoop",
|
||||||
argLength: 3,
|
aux: "Int64",
|
||||||
|
argLength: 2,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []regMask{buildReg("R16"), gp},
|
inputs: []regMask{gp},
|
||||||
clobbers: buildReg("R16"),
|
clobbersArg0: true,
|
||||||
},
|
},
|
||||||
clobberFlags: true,
|
|
||||||
faultOnNilArg0: true,
|
faultOnNilArg0: true,
|
||||||
|
needIntTemp: true,
|
||||||
},
|
},
|
||||||
|
|
||||||
// duffcopy
|
// duffcopy
|
||||||
|
|
|
||||||
|
|
@ -1718,8 +1718,8 @@ const (
|
||||||
OpARM64NotGreaterEqualF
|
OpARM64NotGreaterEqualF
|
||||||
OpARM64LessThanNoov
|
OpARM64LessThanNoov
|
||||||
OpARM64GreaterEqualNoov
|
OpARM64GreaterEqualNoov
|
||||||
OpARM64DUFFZERO
|
|
||||||
OpARM64LoweredZero
|
OpARM64LoweredZero
|
||||||
|
OpARM64LoweredZeroLoop
|
||||||
OpARM64DUFFCOPY
|
OpARM64DUFFCOPY
|
||||||
OpARM64LoweredMove
|
OpARM64LoweredMove
|
||||||
OpARM64LoweredGetClosurePtr
|
OpARM64LoweredGetClosurePtr
|
||||||
|
|
@ -23068,29 +23068,28 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "DUFFZERO",
|
|
||||||
auxType: auxInt64,
|
|
||||||
argLen: 2,
|
|
||||||
unsafePoint: true,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{0, 524288}, // R20
|
|
||||||
},
|
|
||||||
clobbers: 269156352, // R16 R17 R20 R30
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "LoweredZero",
|
name: "LoweredZero",
|
||||||
argLen: 3,
|
auxType: auxInt64,
|
||||||
clobberFlags: true,
|
argLen: 2,
|
||||||
faultOnNilArg0: true,
|
faultOnNilArg0: true,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []inputInfo{
|
inputs: []inputInfo{
|
||||||
{0, 65536}, // R16
|
{0, 335544319}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||||
{1, 335544319}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
|
||||||
},
|
},
|
||||||
clobbers: 65536, // R16
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoweredZeroLoop",
|
||||||
|
auxType: auxInt64,
|
||||||
|
argLen: 2,
|
||||||
|
needIntTemp: true,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 335544319}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||||
|
},
|
||||||
|
clobbersArg0: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -22321,141 +22321,34 @@ func rewriteValueARM64_OpZero(v *Value) bool {
|
||||||
v.AddArg4(ptr, v0, v0, mem)
|
v.AddArg4(ptr, v0, v0, mem)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Zero [32] ptr mem)
|
|
||||||
// result: (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
|
|
||||||
for {
|
|
||||||
if auxIntToInt64(v.AuxInt) != 32 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
ptr := v_0
|
|
||||||
mem := v_1
|
|
||||||
v.reset(OpARM64STP)
|
|
||||||
v.AuxInt = int32ToAuxInt(16)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
|
|
||||||
v0.AuxInt = int64ToAuxInt(0)
|
|
||||||
v1 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
|
|
||||||
v1.AuxInt = int32ToAuxInt(0)
|
|
||||||
v1.AddArg4(ptr, v0, v0, mem)
|
|
||||||
v.AddArg4(ptr, v0, v0, v1)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (Zero [48] ptr mem)
|
|
||||||
// result: (STP [32] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
|
|
||||||
for {
|
|
||||||
if auxIntToInt64(v.AuxInt) != 48 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
ptr := v_0
|
|
||||||
mem := v_1
|
|
||||||
v.reset(OpARM64STP)
|
|
||||||
v.AuxInt = int32ToAuxInt(32)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
|
|
||||||
v0.AuxInt = int64ToAuxInt(0)
|
|
||||||
v1 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
|
|
||||||
v1.AuxInt = int32ToAuxInt(16)
|
|
||||||
v2 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
|
|
||||||
v2.AuxInt = int32ToAuxInt(0)
|
|
||||||
v2.AddArg4(ptr, v0, v0, mem)
|
|
||||||
v1.AddArg4(ptr, v0, v0, v2)
|
|
||||||
v.AddArg4(ptr, v0, v0, v1)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (Zero [64] ptr mem)
|
|
||||||
// result: (STP [48] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [32] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
|
|
||||||
for {
|
|
||||||
if auxIntToInt64(v.AuxInt) != 64 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
ptr := v_0
|
|
||||||
mem := v_1
|
|
||||||
v.reset(OpARM64STP)
|
|
||||||
v.AuxInt = int32ToAuxInt(48)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
|
|
||||||
v0.AuxInt = int64ToAuxInt(0)
|
|
||||||
v1 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
|
|
||||||
v1.AuxInt = int32ToAuxInt(32)
|
|
||||||
v2 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
|
|
||||||
v2.AuxInt = int32ToAuxInt(16)
|
|
||||||
v3 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
|
|
||||||
v3.AuxInt = int32ToAuxInt(0)
|
|
||||||
v3.AddArg4(ptr, v0, v0, mem)
|
|
||||||
v2.AddArg4(ptr, v0, v0, v3)
|
|
||||||
v1.AddArg4(ptr, v0, v0, v2)
|
|
||||||
v.AddArg4(ptr, v0, v0, v1)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (Zero [s] ptr mem)
|
// match: (Zero [s] ptr mem)
|
||||||
// cond: s%16 != 0 && s%16 <= 8 && s > 16
|
// cond: s > 16 && s < 192
|
||||||
// result: (Zero [8] (OffPtr <ptr.Type> ptr [s-8]) (Zero [s-s%16] ptr mem))
|
// result: (LoweredZero [s] ptr mem)
|
||||||
for {
|
for {
|
||||||
s := auxIntToInt64(v.AuxInt)
|
s := auxIntToInt64(v.AuxInt)
|
||||||
ptr := v_0
|
ptr := v_0
|
||||||
mem := v_1
|
mem := v_1
|
||||||
if !(s%16 != 0 && s%16 <= 8 && s > 16) {
|
if !(s > 16 && s < 192) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
v.reset(OpZero)
|
v.reset(OpARM64LoweredZero)
|
||||||
v.AuxInt = int64ToAuxInt(8)
|
v.AuxInt = int64ToAuxInt(s)
|
||||||
v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
|
|
||||||
v0.AuxInt = int64ToAuxInt(s - 8)
|
|
||||||
v0.AddArg(ptr)
|
|
||||||
v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem)
|
|
||||||
v1.AuxInt = int64ToAuxInt(s - s%16)
|
|
||||||
v1.AddArg2(ptr, mem)
|
|
||||||
v.AddArg2(v0, v1)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (Zero [s] ptr mem)
|
|
||||||
// cond: s%16 != 0 && s%16 > 8 && s > 16
|
|
||||||
// result: (Zero [16] (OffPtr <ptr.Type> ptr [s-16]) (Zero [s-s%16] ptr mem))
|
|
||||||
for {
|
|
||||||
s := auxIntToInt64(v.AuxInt)
|
|
||||||
ptr := v_0
|
|
||||||
mem := v_1
|
|
||||||
if !(s%16 != 0 && s%16 > 8 && s > 16) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(OpZero)
|
|
||||||
v.AuxInt = int64ToAuxInt(16)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
|
|
||||||
v0.AuxInt = int64ToAuxInt(s - 16)
|
|
||||||
v0.AddArg(ptr)
|
|
||||||
v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem)
|
|
||||||
v1.AuxInt = int64ToAuxInt(s - s%16)
|
|
||||||
v1.AddArg2(ptr, mem)
|
|
||||||
v.AddArg2(v0, v1)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (Zero [s] ptr mem)
|
|
||||||
// cond: s%16 == 0 && s > 64 && s <= 16*64
|
|
||||||
// result: (DUFFZERO [4 * (64 - s/16)] ptr mem)
|
|
||||||
for {
|
|
||||||
s := auxIntToInt64(v.AuxInt)
|
|
||||||
ptr := v_0
|
|
||||||
mem := v_1
|
|
||||||
if !(s%16 == 0 && s > 64 && s <= 16*64) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(OpARM64DUFFZERO)
|
|
||||||
v.AuxInt = int64ToAuxInt(4 * (64 - s/16))
|
|
||||||
v.AddArg2(ptr, mem)
|
v.AddArg2(ptr, mem)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Zero [s] ptr mem)
|
// match: (Zero [s] ptr mem)
|
||||||
// cond: s%16 == 0 && s > 16*64
|
// cond: s >= 192
|
||||||
// result: (LoweredZero ptr (ADDconst <ptr.Type> [s-16] ptr) mem)
|
// result: (LoweredZeroLoop [s] ptr mem)
|
||||||
for {
|
for {
|
||||||
s := auxIntToInt64(v.AuxInt)
|
s := auxIntToInt64(v.AuxInt)
|
||||||
ptr := v_0
|
ptr := v_0
|
||||||
mem := v_1
|
mem := v_1
|
||||||
if !(s%16 == 0 && s > 16*64) {
|
if !(s >= 192) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
v.reset(OpARM64LoweredZero)
|
v.reset(OpARM64LoweredZeroLoop)
|
||||||
v0 := b.NewValue0(v.Pos, OpARM64ADDconst, ptr.Type)
|
v.AuxInt = int64ToAuxInt(s)
|
||||||
v0.AuxInt = int64ToAuxInt(s - 16)
|
v.AddArg2(ptr, mem)
|
||||||
v0.AddArg(ptr)
|
|
||||||
v.AddArg3(ptr, v0, mem)
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue