mirror of
https://github.com/golang/go.git
synced 2025-10-26 06:14:13 +00:00
cmd/compile: use generated loops instead of DUFFCOPY on arm64
Change-Id: Ic2aa8959b7fc594b86def70b6c2be38badf7970c Reviewed-on: https://go-review.googlesource.com/c/go/+/679015 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
This commit is contained in:
parent
bca3e98b8a
commit
15d6dbc05c
5 changed files with 220 additions and 178 deletions
|
|
@ -1162,41 +1162,119 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
// BNE loop
|
// BNE loop
|
||||||
// There's a past-the-end pointer here, any problem with that?
|
// There's a past-the-end pointer here, any problem with that?
|
||||||
|
|
||||||
case ssa.OpARM64DUFFCOPY:
|
|
||||||
p := s.Prog(obj.ADUFFCOPY)
|
|
||||||
p.To.Type = obj.TYPE_MEM
|
|
||||||
p.To.Name = obj.NAME_EXTERN
|
|
||||||
p.To.Sym = ir.Syms.Duffcopy
|
|
||||||
p.To.Offset = v.AuxInt
|
|
||||||
case ssa.OpARM64LoweredMove:
|
case ssa.OpARM64LoweredMove:
|
||||||
// LDP.P 16(R16), (R25, Rtmp)
|
dstReg := v.Args[0].Reg()
|
||||||
// STP.P (R25, Rtmp), 16(R17)
|
srcReg := v.Args[1].Reg()
|
||||||
// CMP Rarg2, R16
|
if dstReg == srcReg {
|
||||||
// BLE -3(PC)
|
break
|
||||||
// arg2 is the address of the last element of src
|
}
|
||||||
p := s.Prog(arm64.ALDP)
|
tmpReg1 := int16(arm64.REG_R24)
|
||||||
p.Scond = arm64.C_XPOST
|
tmpReg2 := int16(arm64.REG_R25)
|
||||||
p.From.Type = obj.TYPE_MEM
|
n := v.AuxInt
|
||||||
p.From.Reg = arm64.REG_R16
|
if n < 16 {
|
||||||
p.From.Offset = 16
|
v.Fatalf("Move too small %d", n)
|
||||||
p.To.Type = obj.TYPE_REGREG
|
}
|
||||||
p.To.Reg = arm64.REG_R25
|
|
||||||
p.To.Offset = int64(arm64.REGTMP)
|
// Generate copying instructions.
|
||||||
p2 := s.Prog(arm64.ASTP)
|
var off int64
|
||||||
p2.Scond = arm64.C_XPOST
|
for n >= 16 {
|
||||||
p2.From.Type = obj.TYPE_REGREG
|
// LDP off(srcReg), (tmpReg1, tmpReg2)
|
||||||
p2.From.Reg = arm64.REG_R25
|
// STP (tmpReg1, tmpReg2), off(dstReg)
|
||||||
p2.From.Offset = int64(arm64.REGTMP)
|
move16(s, srcReg, dstReg, tmpReg1, tmpReg2, off, false)
|
||||||
p2.To.Type = obj.TYPE_MEM
|
off += 16
|
||||||
p2.To.Reg = arm64.REG_R17
|
n -= 16
|
||||||
p2.To.Offset = 16
|
}
|
||||||
p3 := s.Prog(arm64.ACMP)
|
if n > 8 {
|
||||||
p3.From.Type = obj.TYPE_REG
|
// MOVD off(srcReg), tmpReg1
|
||||||
p3.From.Reg = v.Args[2].Reg()
|
// MOVD tmpReg1, off(dstReg)
|
||||||
p3.Reg = arm64.REG_R16
|
move8(s, srcReg, dstReg, tmpReg1, off)
|
||||||
p4 := s.Prog(arm64.ABLE)
|
off += 8
|
||||||
p4.To.Type = obj.TYPE_BRANCH
|
n -= 8
|
||||||
p4.To.SetTarget(p)
|
}
|
||||||
|
if n != 0 {
|
||||||
|
// MOVD off+n-8(srcReg), tmpReg1
|
||||||
|
// MOVD tmpReg1, off+n-8(dstReg)
|
||||||
|
move8(s, srcReg, dstReg, tmpReg1, off+n-8)
|
||||||
|
}
|
||||||
|
case ssa.OpARM64LoweredMoveLoop:
|
||||||
|
dstReg := v.Args[0].Reg()
|
||||||
|
srcReg := v.Args[1].Reg()
|
||||||
|
if dstReg == srcReg {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
countReg := int16(arm64.REG_R23)
|
||||||
|
tmpReg1 := int16(arm64.REG_R24)
|
||||||
|
tmpReg2 := int16(arm64.REG_R25)
|
||||||
|
n := v.AuxInt
|
||||||
|
loopSize := int64(64)
|
||||||
|
if n < 3*loopSize {
|
||||||
|
// - a loop count of 0 won't work.
|
||||||
|
// - a loop count of 1 is useless.
|
||||||
|
// - a loop count of 2 is a code size ~tie
|
||||||
|
// 3 instructions to implement the loop
|
||||||
|
// 4 instructions in the loop body
|
||||||
|
// vs
|
||||||
|
// 8 instructions in the straightline code
|
||||||
|
// Might as well use straightline code.
|
||||||
|
v.Fatalf("ZeroLoop size too small %d", n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put iteration count in a register.
|
||||||
|
// MOVD $n, countReg
|
||||||
|
p := s.Prog(arm64.AMOVD)
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = n / loopSize
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = countReg
|
||||||
|
cntInit := p
|
||||||
|
|
||||||
|
// Move loopSize bytes starting at srcReg to dstReg.
|
||||||
|
// Increment srcReg and destReg by loopSize as a side effect.
|
||||||
|
for range loopSize / 16 {
|
||||||
|
// LDP.P 16(srcReg), (tmpReg1, tmpReg2)
|
||||||
|
// STP.P (tmpReg1, tmpReg2), 16(dstReg)
|
||||||
|
move16(s, srcReg, dstReg, tmpReg1, tmpReg2, 0, true)
|
||||||
|
}
|
||||||
|
// Decrement loop count.
|
||||||
|
// SUB $1, countReg
|
||||||
|
p = s.Prog(arm64.ASUB)
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = 1
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = countReg
|
||||||
|
// Jump to loop header if we're not done yet.
|
||||||
|
// CBNZ head
|
||||||
|
p = s.Prog(arm64.ACBNZ)
|
||||||
|
p.From.Type = obj.TYPE_REG
|
||||||
|
p.From.Reg = countReg
|
||||||
|
p.To.Type = obj.TYPE_BRANCH
|
||||||
|
p.To.SetTarget(cntInit.Link)
|
||||||
|
|
||||||
|
// Multiples of the loop size are now done.
|
||||||
|
n %= loopSize
|
||||||
|
|
||||||
|
// Copy any fractional portion.
|
||||||
|
var off int64
|
||||||
|
for n >= 16 {
|
||||||
|
// LDP off(srcReg), (tmpReg1, tmpReg2)
|
||||||
|
// STP (tmpReg1, tmpReg2), off(dstReg)
|
||||||
|
move16(s, srcReg, dstReg, tmpReg1, tmpReg2, off, false)
|
||||||
|
off += 16
|
||||||
|
n -= 16
|
||||||
|
}
|
||||||
|
if n > 8 {
|
||||||
|
// MOVD off(srcReg), tmpReg1
|
||||||
|
// MOVD tmpReg1, off(dstReg)
|
||||||
|
move8(s, srcReg, dstReg, tmpReg1, off)
|
||||||
|
off += 8
|
||||||
|
n -= 8
|
||||||
|
}
|
||||||
|
if n != 0 {
|
||||||
|
// MOVD off+n-8(srcReg), tmpReg1
|
||||||
|
// MOVD tmpReg1, off+n-8(dstReg)
|
||||||
|
move8(s, srcReg, dstReg, tmpReg1, off+n-8)
|
||||||
|
}
|
||||||
|
|
||||||
case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
|
case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
|
||||||
s.Call(v)
|
s.Call(v)
|
||||||
case ssa.OpARM64CALLtail:
|
case ssa.OpARM64CALLtail:
|
||||||
|
|
@ -1599,3 +1677,53 @@ func zero8(s *ssagen.State, reg int16, off int64) {
|
||||||
p.To.Reg = reg
|
p.To.Reg = reg
|
||||||
p.To.Offset = off
|
p.To.Offset = off
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// move16 copies 16 bytes at src+off to dst+off.
|
||||||
|
// Uses registers tmp1 and tmp2.
|
||||||
|
// If postInc is true, increment src and dst by 16.
|
||||||
|
func move16(s *ssagen.State, src, dst, tmp1, tmp2 int16, off int64, postInc bool) {
|
||||||
|
// LDP off(src), (tmp1, tmp2)
|
||||||
|
ld := s.Prog(arm64.ALDP)
|
||||||
|
ld.From.Type = obj.TYPE_MEM
|
||||||
|
ld.From.Reg = src
|
||||||
|
ld.From.Offset = off
|
||||||
|
ld.To.Type = obj.TYPE_REGREG
|
||||||
|
ld.To.Reg = tmp1
|
||||||
|
ld.To.Offset = int64(tmp2)
|
||||||
|
// STP (tmp1, tmp2), off(dst)
|
||||||
|
st := s.Prog(arm64.ASTP)
|
||||||
|
st.From.Type = obj.TYPE_REGREG
|
||||||
|
st.From.Reg = tmp1
|
||||||
|
st.From.Offset = int64(tmp2)
|
||||||
|
st.To.Type = obj.TYPE_MEM
|
||||||
|
st.To.Reg = dst
|
||||||
|
st.To.Offset = off
|
||||||
|
if postInc {
|
||||||
|
if off != 0 {
|
||||||
|
panic("can't postinc with non-zero offset")
|
||||||
|
}
|
||||||
|
ld.Scond = arm64.C_XPOST
|
||||||
|
st.Scond = arm64.C_XPOST
|
||||||
|
ld.From.Offset = 16
|
||||||
|
st.To.Offset = 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// move8 copies 8 bytes at src+off to dst+off.
|
||||||
|
// Uses register tmp.
|
||||||
|
func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
|
||||||
|
// MOVD off(src), tmp
|
||||||
|
ld := s.Prog(arm64.AMOVD)
|
||||||
|
ld.From.Type = obj.TYPE_MEM
|
||||||
|
ld.From.Reg = src
|
||||||
|
ld.From.Offset = off
|
||||||
|
ld.To.Type = obj.TYPE_REG
|
||||||
|
ld.To.Reg = tmp
|
||||||
|
// MOVD tmp, off(dst)
|
||||||
|
st := s.Prog(arm64.AMOVD)
|
||||||
|
st.From.Type = obj.TYPE_REG
|
||||||
|
st.From.Reg = tmp
|
||||||
|
st.To.Type = obj.TYPE_MEM
|
||||||
|
st.To.Reg = dst
|
||||||
|
st.To.Offset = off
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -462,39 +462,8 @@
|
||||||
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
|
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
|
||||||
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
|
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
|
||||||
|
|
||||||
// strip off fractional word move
|
(Move [s] dst src mem) && s > 64 && s < 192 && logLargeCopy(v, s) => (LoweredMove [s] dst src mem)
|
||||||
(Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 64 =>
|
(Move [s] dst src mem) && s >= 192 && logLargeCopy(v, s) => (LoweredMoveLoop [s] dst src mem)
|
||||||
(Move [8]
|
|
||||||
(OffPtr <dst.Type> dst [s-8])
|
|
||||||
(OffPtr <src.Type> src [s-8])
|
|
||||||
(Move [s-s%16] dst src mem))
|
|
||||||
(Move [s] dst src mem) && s%16 != 0 && s%16 > 8 && s > 64 =>
|
|
||||||
(Move [16]
|
|
||||||
(OffPtr <dst.Type> dst [s-16])
|
|
||||||
(OffPtr <src.Type> src [s-16])
|
|
||||||
(Move [s-s%16] dst src mem))
|
|
||||||
|
|
||||||
// medium move uses a duff device
|
|
||||||
(Move [s] dst src mem)
|
|
||||||
&& s > 64 && s <= 16*64 && s%16 == 0
|
|
||||||
&& logLargeCopy(v, s) =>
|
|
||||||
(DUFFCOPY [8 * (64 - s/16)] dst src mem)
|
|
||||||
// 8 is the number of bytes to encode:
|
|
||||||
//
|
|
||||||
// LDP.P 16(R16), (R26, R27)
|
|
||||||
// STP.P (R26, R27), 16(R17)
|
|
||||||
//
|
|
||||||
// 64 is number of these blocks. See runtime/duff_arm64.s:duffcopy
|
|
||||||
|
|
||||||
// large move uses a loop
|
|
||||||
(Move [s] dst src mem)
|
|
||||||
&& s%16 == 0 && s > 16*64
|
|
||||||
&& logLargeCopy(v, s) =>
|
|
||||||
(LoweredMove
|
|
||||||
dst
|
|
||||||
src
|
|
||||||
(ADDconst <src.Type> src [s-16])
|
|
||||||
mem)
|
|
||||||
|
|
||||||
// calls
|
// calls
|
||||||
(StaticCall ...) => (CALLstatic ...)
|
(StaticCall ...) => (CALLstatic ...)
|
||||||
|
|
|
||||||
|
|
@ -144,6 +144,8 @@ func init() {
|
||||||
gpspsbg = gpspg | buildReg("SB")
|
gpspsbg = gpspg | buildReg("SB")
|
||||||
fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31")
|
fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31")
|
||||||
callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
|
callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
|
||||||
|
r24to25 = buildReg("R24 R25")
|
||||||
|
r23to25 = buildReg("R23 R24 R25")
|
||||||
rz = buildReg("ZERO")
|
rz = buildReg("ZERO")
|
||||||
first16 = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15")
|
first16 = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15")
|
||||||
)
|
)
|
||||||
|
|
@ -568,47 +570,40 @@ func init() {
|
||||||
needIntTemp: true,
|
needIntTemp: true,
|
||||||
},
|
},
|
||||||
|
|
||||||
// duffcopy
|
// medium copying
|
||||||
// arg0 = address of dst memory (in R21, changed as side effect)
|
// arg0 = address of dst memory
|
||||||
// arg1 = address of src memory (in R20, changed as side effect)
|
// arg1 = address of src memory
|
||||||
// arg2 = mem
|
// arg2 = mem
|
||||||
// auxint = offset into duffcopy code to start executing
|
// auxint = # of bytes to copy
|
||||||
// returns mem
|
// returns mem
|
||||||
// R20, R21 changed as side effect
|
|
||||||
// R16 and R17 may be clobbered by linker trampoline.
|
|
||||||
{
|
{
|
||||||
name: "DUFFCOPY",
|
name: "LoweredMove",
|
||||||
aux: "Int64",
|
aux: "Int64",
|
||||||
argLength: 3,
|
argLength: 3,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []regMask{buildReg("R21"), buildReg("R20")},
|
inputs: []regMask{gp &^ r24to25, gp &^ r24to25},
|
||||||
clobbers: buildReg("R16 R17 R20 R21 R26 R30"),
|
clobbers: r24to25, // TODO: figure out needIntTemp x2
|
||||||
},
|
},
|
||||||
//faultOnNilArg0: true, // Note: removed for 73748. TODO: reenable at some point
|
faultOnNilArg0: true,
|
||||||
//faultOnNilArg1: true,
|
faultOnNilArg1: true,
|
||||||
unsafePoint: true, // FP maintenance around DUFFCOPY can be clobbered by interrupts
|
|
||||||
},
|
},
|
||||||
|
|
||||||
// large move
|
// large copying
|
||||||
// arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
|
// arg0 = address of dst memory
|
||||||
// arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
|
// arg1 = address of src memory
|
||||||
// arg2 = address of the last element of src
|
// arg2 = mem
|
||||||
// arg3 = mem
|
// auxint = # of bytes to copy
|
||||||
// returns mem
|
// returns mem
|
||||||
// LDP.P 16(R16), (R25, Rtmp)
|
|
||||||
// STP.P (R25, Rtmp), 16(R17)
|
|
||||||
// CMP Rarg2, R16
|
|
||||||
// BLE -3(PC)
|
|
||||||
// Note: the-end-of-src may be not a valid pointer. it's a problem if it is spilled.
|
|
||||||
// the-end-of-src - 16 is within the area to copy, ok to spill.
|
|
||||||
{
|
{
|
||||||
name: "LoweredMove",
|
name: "LoweredMoveLoop",
|
||||||
argLength: 4,
|
aux: "Int64",
|
||||||
|
argLength: 3,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []regMask{buildReg("R17"), buildReg("R16"), gp &^ buildReg("R25")},
|
inputs: []regMask{gp &^ r23to25, gp &^ r23to25},
|
||||||
clobbers: buildReg("R16 R17 R25"),
|
clobbers: r23to25, // TODO: figure out needIntTemp x3
|
||||||
|
clobbersArg0: true,
|
||||||
|
clobbersArg1: true,
|
||||||
},
|
},
|
||||||
clobberFlags: true,
|
|
||||||
faultOnNilArg0: true,
|
faultOnNilArg0: true,
|
||||||
faultOnNilArg1: true,
|
faultOnNilArg1: true,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1720,8 +1720,8 @@ const (
|
||||||
OpARM64GreaterEqualNoov
|
OpARM64GreaterEqualNoov
|
||||||
OpARM64LoweredZero
|
OpARM64LoweredZero
|
||||||
OpARM64LoweredZeroLoop
|
OpARM64LoweredZeroLoop
|
||||||
OpARM64DUFFCOPY
|
|
||||||
OpARM64LoweredMove
|
OpARM64LoweredMove
|
||||||
|
OpARM64LoweredMoveLoop
|
||||||
OpARM64LoweredGetClosurePtr
|
OpARM64LoweredGetClosurePtr
|
||||||
OpARM64LoweredGetCallerSP
|
OpARM64LoweredGetCallerSP
|
||||||
OpARM64LoweredGetCallerPC
|
OpARM64LoweredGetCallerPC
|
||||||
|
|
@ -23096,31 +23096,33 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "DUFFCOPY",
|
name: "LoweredMove",
|
||||||
auxType: auxInt64,
|
auxType: auxInt64,
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
unsafePoint: true,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{0, 1048576}, // R21
|
|
||||||
{1, 524288}, // R20
|
|
||||||
},
|
|
||||||
clobbers: 303759360, // R16 R17 R20 R21 R26 R30
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "LoweredMove",
|
|
||||||
argLen: 4,
|
|
||||||
clobberFlags: true,
|
|
||||||
faultOnNilArg0: true,
|
faultOnNilArg0: true,
|
||||||
faultOnNilArg1: true,
|
faultOnNilArg1: true,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []inputInfo{
|
inputs: []inputInfo{
|
||||||
{0, 131072}, // R17
|
{0, 310378495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R26 R30
|
||||||
{1, 65536}, // R16
|
{1, 310378495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R26 R30
|
||||||
{2, 318767103}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R26 R30
|
|
||||||
},
|
},
|
||||||
clobbers: 16973824, // R16 R17 R25
|
clobbers: 25165824, // R24 R25
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoweredMoveLoop",
|
||||||
|
auxType: auxInt64,
|
||||||
|
argLen: 3,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
faultOnNilArg1: true,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 306184191}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R26 R30
|
||||||
|
{1, 306184191}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R26 R30
|
||||||
|
},
|
||||||
|
clobbers: 29360128, // R23 R24 R25
|
||||||
|
clobbersArg0: true,
|
||||||
|
clobbersArg1: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -19688,87 +19688,35 @@ func rewriteValueARM64_OpMove(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Move [s] dst src mem)
|
// match: (Move [s] dst src mem)
|
||||||
// cond: s%16 != 0 && s%16 <= 8 && s > 64
|
// cond: s > 64 && s < 192 && logLargeCopy(v, s)
|
||||||
// result: (Move [8] (OffPtr <dst.Type> dst [s-8]) (OffPtr <src.Type> src [s-8]) (Move [s-s%16] dst src mem))
|
// result: (LoweredMove [s] dst src mem)
|
||||||
for {
|
for {
|
||||||
s := auxIntToInt64(v.AuxInt)
|
s := auxIntToInt64(v.AuxInt)
|
||||||
dst := v_0
|
dst := v_0
|
||||||
src := v_1
|
src := v_1
|
||||||
mem := v_2
|
mem := v_2
|
||||||
if !(s%16 != 0 && s%16 <= 8 && s > 64) {
|
if !(s > 64 && s < 192 && logLargeCopy(v, s)) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
v.reset(OpMove)
|
v.reset(OpARM64LoweredMove)
|
||||||
v.AuxInt = int64ToAuxInt(8)
|
v.AuxInt = int64ToAuxInt(s)
|
||||||
v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
|
|
||||||
v0.AuxInt = int64ToAuxInt(s - 8)
|
|
||||||
v0.AddArg(dst)
|
|
||||||
v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
|
|
||||||
v1.AuxInt = int64ToAuxInt(s - 8)
|
|
||||||
v1.AddArg(src)
|
|
||||||
v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem)
|
|
||||||
v2.AuxInt = int64ToAuxInt(s - s%16)
|
|
||||||
v2.AddArg3(dst, src, mem)
|
|
||||||
v.AddArg3(v0, v1, v2)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (Move [s] dst src mem)
|
|
||||||
// cond: s%16 != 0 && s%16 > 8 && s > 64
|
|
||||||
// result: (Move [16] (OffPtr <dst.Type> dst [s-16]) (OffPtr <src.Type> src [s-16]) (Move [s-s%16] dst src mem))
|
|
||||||
for {
|
|
||||||
s := auxIntToInt64(v.AuxInt)
|
|
||||||
dst := v_0
|
|
||||||
src := v_1
|
|
||||||
mem := v_2
|
|
||||||
if !(s%16 != 0 && s%16 > 8 && s > 64) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(OpMove)
|
|
||||||
v.AuxInt = int64ToAuxInt(16)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
|
|
||||||
v0.AuxInt = int64ToAuxInt(s - 16)
|
|
||||||
v0.AddArg(dst)
|
|
||||||
v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
|
|
||||||
v1.AuxInt = int64ToAuxInt(s - 16)
|
|
||||||
v1.AddArg(src)
|
|
||||||
v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem)
|
|
||||||
v2.AuxInt = int64ToAuxInt(s - s%16)
|
|
||||||
v2.AddArg3(dst, src, mem)
|
|
||||||
v.AddArg3(v0, v1, v2)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (Move [s] dst src mem)
|
|
||||||
// cond: s > 64 && s <= 16*64 && s%16 == 0 && logLargeCopy(v, s)
|
|
||||||
// result: (DUFFCOPY [8 * (64 - s/16)] dst src mem)
|
|
||||||
for {
|
|
||||||
s := auxIntToInt64(v.AuxInt)
|
|
||||||
dst := v_0
|
|
||||||
src := v_1
|
|
||||||
mem := v_2
|
|
||||||
if !(s > 64 && s <= 16*64 && s%16 == 0 && logLargeCopy(v, s)) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(OpARM64DUFFCOPY)
|
|
||||||
v.AuxInt = int64ToAuxInt(8 * (64 - s/16))
|
|
||||||
v.AddArg3(dst, src, mem)
|
v.AddArg3(dst, src, mem)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Move [s] dst src mem)
|
// match: (Move [s] dst src mem)
|
||||||
// cond: s%16 == 0 && s > 16*64 && logLargeCopy(v, s)
|
// cond: s >= 192 && logLargeCopy(v, s)
|
||||||
// result: (LoweredMove dst src (ADDconst <src.Type> src [s-16]) mem)
|
// result: (LoweredMoveLoop [s] dst src mem)
|
||||||
for {
|
for {
|
||||||
s := auxIntToInt64(v.AuxInt)
|
s := auxIntToInt64(v.AuxInt)
|
||||||
dst := v_0
|
dst := v_0
|
||||||
src := v_1
|
src := v_1
|
||||||
mem := v_2
|
mem := v_2
|
||||||
if !(s%16 == 0 && s > 16*64 && logLargeCopy(v, s)) {
|
if !(s >= 192 && logLargeCopy(v, s)) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
v.reset(OpARM64LoweredMove)
|
v.reset(OpARM64LoweredMoveLoop)
|
||||||
v0 := b.NewValue0(v.Pos, OpARM64ADDconst, src.Type)
|
v.AuxInt = int64ToAuxInt(s)
|
||||||
v0.AuxInt = int64ToAuxInt(s - 16)
|
v.AddArg3(dst, src, mem)
|
||||||
v0.AddArg(src)
|
|
||||||
v.AddArg4(dst, src, v0, mem)
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue