mirror of
https://github.com/golang/go.git
synced 2026-02-07 02:09:55 +00:00
cmd/compile: use generated loops instead of DUFFZERO on loong64
Change-Id: Id43ee4353d4bac96627f8b0f54545cdd3d2a1d1b Reviewed-on: https://go-review.googlesource.com/c/go/+/699695 Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: abner chenc <chenguoqi@loongson.cn>
This commit is contained in:
parent
882335e2cb
commit
7bba745820
5 changed files with 154 additions and 90 deletions
|
|
@ -560,28 +560,97 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.To.Sym = ir.Syms.Duffzero
|
||||
p.To.Offset = v.AuxInt
|
||||
case ssa.OpLOONG64LoweredZero:
|
||||
// MOVx R0, (Rarg0)
|
||||
// ADDV $sz, Rarg0
|
||||
// BGEU Rarg1, Rarg0, -2(PC)
|
||||
mov, sz := largestMove(v.AuxInt)
|
||||
p := s.Prog(mov)
|
||||
ptrReg := v.Args[0].Reg()
|
||||
n := v.AuxInt
|
||||
if n < 16 {
|
||||
v.Fatalf("Zero too small %d", n)
|
||||
}
|
||||
|
||||
// Generate Zeroing instructions.
|
||||
var off int64
|
||||
for n >= 8 {
|
||||
// MOVV ZR, off(ptrReg)
|
||||
zero8(s, ptrReg, off)
|
||||
off += 8
|
||||
n -= 8
|
||||
}
|
||||
if n != 0 {
|
||||
// MOVV ZR, off+n-8(ptrReg)
|
||||
zero8(s, ptrReg, off+n-8)
|
||||
}
|
||||
case ssa.OpLOONG64LoweredZeroLoop:
|
||||
ptrReg := v.Args[0].Reg()
|
||||
countReg := v.RegTmp()
|
||||
var off int64
|
||||
n := v.AuxInt
|
||||
loopSize := int64(64)
|
||||
if n < 3*loopSize {
|
||||
// - a loop count of 0 won't work.
|
||||
// - a loop count of 1 is useless.
|
||||
// - a loop count of 2 is a code size ~tie
|
||||
// 4 instructions to implement the loop
|
||||
// 8 instructions in the loop body
|
||||
// vs
|
||||
// 16 instuctions in the straightline code
|
||||
// Might as well use straightline code.
|
||||
v.Fatalf("ZeroLoop size tool small %d", n)
|
||||
}
|
||||
|
||||
// Put iteration count in a register.
|
||||
// MOVV $n/loopSize, countReg
|
||||
p := s.Prog(loong64.AMOVV)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = n / loopSize
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = countReg
|
||||
cntInit := p
|
||||
|
||||
// Zero loopSize bytes starting at ptrReg.
|
||||
for range loopSize / 8 {
|
||||
// MOVV ZR, off(ptrReg)
|
||||
zero8(s, ptrReg, off)
|
||||
off += 8
|
||||
}
|
||||
|
||||
// Increment ptrReg by loopSize.
|
||||
// ADDV $loopSize, ptrReg
|
||||
p = s.Prog(loong64.AADDV)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = loopSize
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = ptrReg
|
||||
|
||||
// Decrement loop count.
|
||||
// SUBV $1, countReg
|
||||
p = s.Prog(loong64.ASUBV)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = 1
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = countReg
|
||||
|
||||
// Jump to loop header if we're not done yet.
|
||||
// BNE countReg, loop header
|
||||
p = s.Prog(loong64.ABNE)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = loong64.REGZERO
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
p.From.Reg = countReg
|
||||
p.To.Type = obj.TYPE_BRANCH
|
||||
p.To.SetTarget(cntInit.Link)
|
||||
|
||||
p2 := s.Prog(loong64.AADDVU)
|
||||
p2.From.Type = obj.TYPE_CONST
|
||||
p2.From.Offset = sz
|
||||
p2.To.Type = obj.TYPE_REG
|
||||
p2.To.Reg = v.Args[0].Reg()
|
||||
// Multiples of the loop size are now done.
|
||||
n %= loopSize
|
||||
|
||||
p3 := s.Prog(loong64.ABGEU)
|
||||
p3.From.Type = obj.TYPE_REG
|
||||
p3.From.Reg = v.Args[1].Reg()
|
||||
p3.Reg = v.Args[0].Reg()
|
||||
p3.To.Type = obj.TYPE_BRANCH
|
||||
p3.To.SetTarget(p)
|
||||
off = 0
|
||||
// Write any fractional portion.
|
||||
for n >= 8 {
|
||||
// MOVV ZR, off(ptrReg)
|
||||
zero8(s, ptrReg, off)
|
||||
off += 8
|
||||
n -= 8
|
||||
}
|
||||
|
||||
if n != 0 {
|
||||
zero8(s, ptrReg, off+n-8)
|
||||
}
|
||||
|
||||
case ssa.OpLOONG64DUFFCOPY:
|
||||
p := s.Prog(obj.ADUFFCOPY)
|
||||
|
|
@ -1155,3 +1224,14 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
|
|||
p.Pos = p.Pos.WithNotStmt()
|
||||
return p
|
||||
}
|
||||
|
||||
// zero8 zeroes 8 bytes at reg+off.
|
||||
func zero8(s *ssagen.State, reg int16, off int64) {
|
||||
// MOVV ZR, off(reg)
|
||||
p := s.Prog(loong64.AMOVV)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = loong64.REGZERO
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = reg
|
||||
p.To.Offset = off
|
||||
}
|
||||
|
|
|
|||
|
|
@ -373,24 +373,8 @@
|
|||
(MOVVstore [8] ptr (MOVVconst [0])
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
|
||||
// strip off fractional word zeroing
|
||||
(Zero [s] ptr mem) && s%8 != 0 && s > 16 =>
|
||||
(Zero [s%8]
|
||||
(OffPtr <ptr.Type> ptr [s-s%8])
|
||||
(Zero [s-s%8] ptr mem))
|
||||
|
||||
// medium zeroing uses a duff device
|
||||
(Zero [s] ptr mem)
|
||||
&& s%8 == 0 && s > 16 && s <= 8*128 =>
|
||||
(DUFFZERO [8 * (128 - s/8)] ptr mem)
|
||||
|
||||
// large zeroing uses a loop
|
||||
(Zero [s] ptr mem)
|
||||
&& s%8 == 0 && s > 8*128 =>
|
||||
(LoweredZero
|
||||
ptr
|
||||
(ADDVconst <ptr.Type> ptr [s-8])
|
||||
mem)
|
||||
(Zero [s] ptr mem) && s > 16 && s < 192 => (LoweredZero [s] ptr mem)
|
||||
(Zero [s] ptr mem) && s >= 192 => (LoweredZeroLoop [s] ptr mem)
|
||||
|
||||
// moves
|
||||
(Move [0] _ _ mem) => mem
|
||||
|
|
|
|||
|
|
@ -376,6 +376,21 @@ func init() {
|
|||
faultOnNilArg0: true,
|
||||
},
|
||||
|
||||
// medium zeroing
|
||||
// arg0 = address of memory to zero
|
||||
// arg1 = mem
|
||||
// auxint = number of bytes to zero
|
||||
// returns mem
|
||||
{
|
||||
name: "LoweredZero",
|
||||
aux: "Int64",
|
||||
argLength: 2,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{gp},
|
||||
},
|
||||
faultOnNilArg0: true,
|
||||
},
|
||||
|
||||
// duffcopy
|
||||
// arg0 = address of dst memory (in R21, changed as side effect)
|
||||
// arg1 = address of src memory (in R20, changed as side effect)
|
||||
|
|
@ -395,25 +410,21 @@ func init() {
|
|||
faultOnNilArg1: true,
|
||||
},
|
||||
|
||||
// large or unaligned zeroing
|
||||
// arg0 = address of memory to zero (in R20, changed as side effect)
|
||||
// arg1 = address of the last element to zero
|
||||
// arg2 = mem
|
||||
// auxint = alignment
|
||||
// large zeroing
|
||||
// arg0 = address of memory to zero
|
||||
// arg1 = mem
|
||||
// auxint = number of bytes to zero
|
||||
// returns mem
|
||||
// MOVx R0, (R20)
|
||||
// ADDV $sz, R20
|
||||
// BGEU Rarg1, R20, -2(PC)
|
||||
{
|
||||
name: "LoweredZero",
|
||||
name: "LoweredZeroLoop",
|
||||
aux: "Int64",
|
||||
argLength: 3,
|
||||
argLength: 2,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{buildReg("R20"), gp},
|
||||
clobbers: buildReg("R20"),
|
||||
inputs: []regMask{gp},
|
||||
clobbersArg0: true,
|
||||
},
|
||||
typ: "Mem",
|
||||
faultOnNilArg0: true,
|
||||
needIntTemp: true,
|
||||
},
|
||||
|
||||
// large or unaligned move
|
||||
|
|
|
|||
|
|
@ -1923,8 +1923,9 @@ const (
|
|||
OpLOONG64CALLclosure
|
||||
OpLOONG64CALLinter
|
||||
OpLOONG64DUFFZERO
|
||||
OpLOONG64DUFFCOPY
|
||||
OpLOONG64LoweredZero
|
||||
OpLOONG64DUFFCOPY
|
||||
OpLOONG64LoweredZeroLoop
|
||||
OpLOONG64LoweredMove
|
||||
OpLOONG64LoweredAtomicLoad8
|
||||
OpLOONG64LoweredAtomicLoad32
|
||||
|
|
@ -25912,6 +25913,17 @@ var opcodeTable = [...]opInfo{
|
|||
clobbers: 524290, // R1 R20
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredZero",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "DUFFCOPY",
|
||||
auxType: auxInt64,
|
||||
|
|
@ -25927,16 +25939,16 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredZero",
|
||||
name: "LoweredZeroLoop",
|
||||
auxType: auxInt64,
|
||||
argLen: 3,
|
||||
argLen: 2,
|
||||
needIntTemp: true,
|
||||
faultOnNilArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 524288}, // R20
|
||||
{1, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
clobbers: 524288, // R20
|
||||
clobbersArg0: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -11497,56 +11497,33 @@ func rewriteValueLOONG64_OpZero(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
// match: (Zero [s] ptr mem)
|
||||
// cond: s%8 != 0 && s > 16
|
||||
// result: (Zero [s%8] (OffPtr <ptr.Type> ptr [s-s%8]) (Zero [s-s%8] ptr mem))
|
||||
// cond: s > 16 && s < 192
|
||||
// result: (LoweredZero [s] ptr mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(s%8 != 0 && s > 16) {
|
||||
if !(s > 16 && s < 192) {
|
||||
break
|
||||
}
|
||||
v.reset(OpZero)
|
||||
v.AuxInt = int64ToAuxInt(s % 8)
|
||||
v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
|
||||
v0.AuxInt = int64ToAuxInt(s - s%8)
|
||||
v0.AddArg(ptr)
|
||||
v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem)
|
||||
v1.AuxInt = int64ToAuxInt(s - s%8)
|
||||
v1.AddArg2(ptr, mem)
|
||||
v.AddArg2(v0, v1)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] ptr mem)
|
||||
// cond: s%8 == 0 && s > 16 && s <= 8*128
|
||||
// result: (DUFFZERO [8 * (128 - s/8)] ptr mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(s%8 == 0 && s > 16 && s <= 8*128) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64DUFFZERO)
|
||||
v.AuxInt = int64ToAuxInt(8 * (128 - s/8))
|
||||
v.reset(OpLOONG64LoweredZero)
|
||||
v.AuxInt = int64ToAuxInt(s)
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] ptr mem)
|
||||
// cond: s%8 == 0 && s > 8*128
|
||||
// result: (LoweredZero ptr (ADDVconst <ptr.Type> ptr [s-8]) mem)
|
||||
// cond: s >= 192
|
||||
// result: (LoweredZeroLoop [s] ptr mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(s%8 == 0 && s > 8*128) {
|
||||
if !(s >= 192) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64LoweredZero)
|
||||
v0 := b.NewValue0(v.Pos, OpLOONG64ADDVconst, ptr.Type)
|
||||
v0.AuxInt = int64ToAuxInt(s - 8)
|
||||
v0.AddArg(ptr)
|
||||
v.AddArg3(ptr, v0, mem)
|
||||
v.reset(OpLOONG64LoweredZeroLoop)
|
||||
v.AuxInt = int64ToAuxInt(s)
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue