mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: use generated loops instead of DUFFZERO on riscv64
MemclrKnownSize112-4 5.602Gi ± 0% 5.601Gi ± 0% ~ (p=0.363 n=10) MemclrKnownSize128-4 6.933Gi ± 1% 6.545Gi ± 1% -5.59% (p=0.000 n=10) MemclrKnownSize192-4 8.055Gi ± 1% 7.804Gi ± 0% -3.12% (p=0.000 n=10) MemclrKnownSize248-4 8.489Gi ± 0% 8.718Gi ± 0% +2.69% (p=0.000 n=10) MemclrKnownSize256-4 8.762Gi ± 0% 8.763Gi ± 0% ~ (p=0.494 n=10) MemclrKnownSize512-4 9.514Gi ± 1% 9.514Gi ± 0% ~ (p=0.529 n=10) MemclrKnownSize1024-4 9.940Gi ± 0% 9.939Gi ± 1% ~ (p=0.989 n=10) ClearFat3-4 1.300Gi ± 0% 1.301Gi ± 0% ~ (p=0.447 n=10) ClearFat4-4 3.902Gi ± 0% 3.902Gi ± 0% ~ (p=0.971 n=10) ClearFat5-4 665.8Mi ± 0% 1331.5Mi ± 0% +100.01% (p=0.000 n=10) ClearFat6-4 665.8Mi ± 0% 1330.5Mi ± 0% +99.82% (p=0.000 n=10) ClearFat7-4 490.7Mi ± 0% 1331.9Mi ± 0% +171.45% (p=0.000 n=10) ClearFat8-4 5.201Gi ± 0% 5.202Gi ± 0% ~ (p=0.123 n=10) ClearFat9-4 856.1Mi ± 0% 1331.6Mi ± 0% +55.54% (p=0.000 n=10) ClearFat10-4 887.8Mi ± 0% 1331.9Mi ± 0% +50.03% (p=0.000 n=10) ClearFat11-4 915.3Mi ± 0% 1331.1Mi ± 0% +45.42% (p=0.000 n=10) ClearFat12-4 5.202Gi ± 0% 5.202Gi ± 0% ~ (p=0.481 n=10) ClearFat13-4 961.5Mi ± 0% 1331.8Mi ± 0% +38.50% (p=0.000 n=10) ClearFat14-4 981.0Mi ± 0% 1331.8Mi ± 0% +35.76% (p=0.000 n=10) ClearFat15-4 951.3Mi ± 0% 1331.4Mi ± 0% +39.96% (p=0.000 n=10) ClearFat16-4 1.600Gi ± 0% 5.202Gi ± 0% +225.10% (p=0.000 n=10) ClearFat18-4 1.018Gi ± 0% 1.300Gi ± 0% +27.77% (p=0.000 n=10) ClearFat20-4 2.601Gi ± 0% 4.938Gi ± 12% +89.87% (p=0.000 n=10) ClearFat24-4 2.601Gi ± 0% 5.201Gi ± 0% +99.96% (p=0.000 n=10) ClearFat32-4 1.982Gi ± 0% 5.203Gi ± 0% +162.55% (p=0.000 n=10) ClearFat40-4 3.467Gi ± 0% 4.338Gi ± 0% +25.11% (p=0.000 n=10) ClearFat48-4 3.671Gi ± 0% 5.201Gi ± 0% +41.69% (p=0.000 n=10) ClearFat56-4 3.640Gi ± 0% 5.201Gi ± 0% +42.88% (p=0.000 n=10) ClearFat64-4 2.250Gi ± 0% 5.202Gi ± 0% +131.25% (p=0.000 n=10) ClearFat72-4 4.064Gi ± 0% 5.201Gi ± 0% +27.97% (p=0.000 n=10) ClearFat128-4 4.496Gi ± 0% 5.203Gi ± 0% +15.71% (p=0.000 n=10) ClearFat256-4 4.756Gi ± 0% 5.201Gi ± 0% +9.36% (p=0.000 n=10) ClearFat512-4 2.512Gi ± 0% 5.201Gi ± 0% +107.03% (p=0.000 n=10) ClearFat1024-4 4.255Gi ± 0% 5.202Gi ± 0% +22.26% (p=0.000 n=10) ClearFat1032-4 4.260Gi ± 0% 5.201Gi ± 0% +22.09% (p=0.000 n=10) ClearFat1040-4 4.285Gi ± 1% 5.203Gi ± 0% +21.41% (p=0.000 n=10) geomean 2.005Gi 3.020Gi +50.58% Change-Id: Iea1da734ff8eaf1b5a2822ae2bdb7f4fd9b65651 Reviewed-on: https://go-review.googlesource.com/c/go/+/699635 Reviewed-by: Mark Ryan <markdryan@rivosinc.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Mark Freeman <markfreeman@google.com>
This commit is contained in:
parent
77643dc63f
commit
879ff736d3
5 changed files with 152 additions and 176 deletions
|
|
@ -181,6 +181,8 @@ func largestMove(alignment int64) (obj.As, int64) {
|
|||
}
|
||||
}
|
||||
|
||||
var fracMovOps = []obj.As{riscv.AMOVB, riscv.AMOVH, riscv.AMOVW, riscv.AMOV}
|
||||
|
||||
// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
|
||||
// RISC-V has no flags, so this is a no-op.
|
||||
func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {}
|
||||
|
|
@ -738,30 +740,86 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.RegTo2 = riscv.REG_ZERO
|
||||
|
||||
case ssa.OpRISCV64LoweredZero:
|
||||
mov, sz := largestMove(v.AuxInt)
|
||||
ptr := v.Args[0].Reg()
|
||||
sc := v.AuxValAndOff()
|
||||
n := sc.Val64()
|
||||
|
||||
// mov ZERO, (Rarg0)
|
||||
// ADD $sz, Rarg0
|
||||
// BGEU Rarg1, Rarg0, -2(PC)
|
||||
mov, sz := largestMove(sc.Off64())
|
||||
|
||||
p := s.Prog(mov)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = riscv.REG_ZERO
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
// mov ZERO, (offset)(Rarg0)
|
||||
var off int64
|
||||
for n >= sz {
|
||||
zeroOp(s, mov, ptr, off)
|
||||
off += sz
|
||||
n -= sz
|
||||
}
|
||||
|
||||
for i := len(fracMovOps) - 1; i >= 0; i-- {
|
||||
tsz := int64(1 << i)
|
||||
if n < tsz {
|
||||
continue
|
||||
}
|
||||
zeroOp(s, fracMovOps[i], ptr, off)
|
||||
off += tsz
|
||||
n -= tsz
|
||||
}
|
||||
|
||||
case ssa.OpRISCV64LoweredZeroLoop:
|
||||
ptr := v.Args[0].Reg()
|
||||
sc := v.AuxValAndOff()
|
||||
n := sc.Val64()
|
||||
mov, sz := largestMove(sc.Off64())
|
||||
chunk := 8 * sz
|
||||
|
||||
if n <= 3*chunk {
|
||||
v.Fatalf("ZeroLoop too small:%d, expect:%d", n, 3*chunk)
|
||||
}
|
||||
|
||||
tmp := v.RegTmp()
|
||||
|
||||
p := s.Prog(riscv.AADD)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = n - n%chunk
|
||||
p.Reg = ptr
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = tmp
|
||||
|
||||
for i := int64(0); i < 8; i++ {
|
||||
zeroOp(s, mov, ptr, sz*i)
|
||||
}
|
||||
|
||||
p2 := s.Prog(riscv.AADD)
|
||||
p2.From.Type = obj.TYPE_CONST
|
||||
p2.From.Offset = sz
|
||||
p2.From.Offset = chunk
|
||||
p2.To.Type = obj.TYPE_REG
|
||||
p2.To.Reg = v.Args[0].Reg()
|
||||
p2.To.Reg = ptr
|
||||
|
||||
p3 := s.Prog(riscv.ABGEU)
|
||||
p3.To.Type = obj.TYPE_BRANCH
|
||||
p3.Reg = v.Args[0].Reg()
|
||||
p3 := s.Prog(riscv.ABNE)
|
||||
p3.From.Reg = tmp
|
||||
p3.From.Type = obj.TYPE_REG
|
||||
p3.From.Reg = v.Args[1].Reg()
|
||||
p3.To.SetTarget(p)
|
||||
p3.Reg = ptr
|
||||
p3.To.Type = obj.TYPE_BRANCH
|
||||
p3.To.SetTarget(p.Link)
|
||||
|
||||
n %= chunk
|
||||
|
||||
// mov ZERO, (offset)(Rarg0)
|
||||
var off int64
|
||||
for n >= sz {
|
||||
zeroOp(s, mov, ptr, off)
|
||||
off += sz
|
||||
n -= sz
|
||||
}
|
||||
|
||||
for i := len(fracMovOps) - 1; i >= 0; i-- {
|
||||
tsz := int64(1 << i)
|
||||
if n < tsz {
|
||||
continue
|
||||
}
|
||||
zeroOp(s, fracMovOps[i], ptr, off)
|
||||
off += tsz
|
||||
n -= tsz
|
||||
}
|
||||
|
||||
case ssa.OpRISCV64LoweredMove:
|
||||
mov, sz := largestMove(v.AuxInt)
|
||||
|
|
@ -955,3 +1013,13 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
|
|||
p.Pos = p.Pos.WithNotStmt()
|
||||
return p
|
||||
}
|
||||
|
||||
func zeroOp(s *ssagen.State, mov obj.As, reg int16, off int64) {
|
||||
p := s.Prog(mov)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = riscv.REG_ZERO
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = reg
|
||||
p.To.Offset = off
|
||||
return
|
||||
}
|
||||
|
|
|
|||
|
|
@ -373,36 +373,14 @@
|
|||
(MOVHstore [4] ptr (MOVDconst [0])
|
||||
(MOVHstore [2] ptr (MOVDconst [0])
|
||||
(MOVHstore ptr (MOVDconst [0]) mem)))
|
||||
(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 =>
|
||||
(MOVWstore [8] ptr (MOVDconst [0])
|
||||
(MOVWstore [4] ptr (MOVDconst [0])
|
||||
(MOVWstore ptr (MOVDconst [0]) mem)))
|
||||
(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVDstore [8] ptr (MOVDconst [0])
|
||||
(MOVDstore ptr (MOVDconst [0]) mem))
|
||||
(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVDstore [16] ptr (MOVDconst [0])
|
||||
(MOVDstore [8] ptr (MOVDconst [0])
|
||||
(MOVDstore ptr (MOVDconst [0]) mem)))
|
||||
(Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVDstore [24] ptr (MOVDconst [0])
|
||||
(MOVDstore [16] ptr (MOVDconst [0])
|
||||
(MOVDstore [8] ptr (MOVDconst [0])
|
||||
(MOVDstore ptr (MOVDconst [0]) mem))))
|
||||
|
||||
// Medium 8-aligned zeroing uses a Duff's device
|
||||
// 8 and 128 are magic constants, see runtime/mkduff.go
|
||||
(Zero [s] {t} ptr mem)
|
||||
&& s%8 == 0 && s <= 8*128
|
||||
&& t.Alignment()%8 == 0 =>
|
||||
(DUFFZERO [8 * (128 - s/8)] ptr mem)
|
||||
// Unroll zeroing in medium size (at most 192 bytes i.e. 3 cachelines)
|
||||
(Zero [s] {t} ptr mem) && s <= 24*moveSize(t.Alignment(), config) =>
|
||||
(LoweredZero [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
|
||||
|
||||
// Generic zeroing uses a loop
|
||||
(Zero [s] {t} ptr mem) =>
|
||||
(LoweredZero [t.Alignment()]
|
||||
ptr
|
||||
(ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)]))
|
||||
mem)
|
||||
(Zero [s] {t} ptr mem) && s > 24*moveSize(t.Alignment(), config) =>
|
||||
(LoweredZeroLoop [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
|
||||
|
||||
// Checks
|
||||
(IsNonNil ...) => (SNEZ ...)
|
||||
|
|
|
|||
|
|
@ -317,25 +317,40 @@ func init() {
|
|||
|
||||
// Generic moves and zeros
|
||||
|
||||
// general unaligned zeroing
|
||||
// arg0 = address of memory to zero (in X5, changed as side effect)
|
||||
// arg1 = address of the last element to zero (inclusive)
|
||||
// arg2 = mem
|
||||
// auxint = element size
|
||||
// general unrolled zeroing
|
||||
// arg0 = address of memory to zero
|
||||
// arg1 = mem
|
||||
// auxint = element size and type alignment
|
||||
// returns mem
|
||||
// mov ZERO, (X5)
|
||||
// ADD $sz, X5
|
||||
// BGEU Rarg1, X5, -2(PC)
|
||||
// mov ZERO, (OFFSET)(Rarg0)
|
||||
{
|
||||
name: "LoweredZero",
|
||||
aux: "Int64",
|
||||
argLength: 3,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{regNamed["X5"], gpMask},
|
||||
clobbers: regNamed["X5"],
|
||||
},
|
||||
aux: "SymValAndOff",
|
||||
typ: "Mem",
|
||||
argLength: 2,
|
||||
symEffect: "Write",
|
||||
faultOnNilArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{gpMask},
|
||||
},
|
||||
},
|
||||
// general unaligned zeroing
|
||||
// arg0 = address of memory to zero (clobber)
|
||||
// arg2 = mem
|
||||
// auxint = element size and type alignment
|
||||
// returns mem
|
||||
{
|
||||
name: "LoweredZeroLoop",
|
||||
aux: "SymValAndOff",
|
||||
typ: "Mem",
|
||||
argLength: 2,
|
||||
symEffect: "Write",
|
||||
needIntTemp: true,
|
||||
faultOnNilArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{gpMask},
|
||||
clobbersArg0: true,
|
||||
},
|
||||
},
|
||||
|
||||
// general unaligned move
|
||||
|
|
|
|||
|
|
@ -2569,6 +2569,7 @@ const (
|
|||
OpRISCV64DUFFZERO
|
||||
OpRISCV64DUFFCOPY
|
||||
OpRISCV64LoweredZero
|
||||
OpRISCV64LoweredZeroLoop
|
||||
OpRISCV64LoweredMove
|
||||
OpRISCV64LoweredAtomicLoad8
|
||||
OpRISCV64LoweredAtomicLoad32
|
||||
|
|
@ -34558,15 +34559,28 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
{
|
||||
name: "LoweredZero",
|
||||
auxType: auxInt64,
|
||||
argLen: 3,
|
||||
auxType: auxSymValAndOff,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymWrite,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 16}, // X5
|
||||
{1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
|
||||
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
|
||||
},
|
||||
clobbers: 16, // X5
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredZeroLoop",
|
||||
auxType: auxSymValAndOff,
|
||||
argLen: 2,
|
||||
needIntTemp: true,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymWrite,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
|
||||
},
|
||||
clobbersArg0: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -9925,138 +9925,39 @@ func rewriteValueRISCV64_OpZero(v *Value) bool {
|
|||
v.AddArg3(ptr, v0, v1)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [12] {t} ptr mem)
|
||||
// cond: t.Alignment()%4 == 0
|
||||
// result: (MOVWstore [8] ptr (MOVDconst [0]) (MOVWstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem)))
|
||||
for {
|
||||
if auxIntToInt64(v.AuxInt) != 12 {
|
||||
break
|
||||
}
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(t.Alignment()%4 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpRISCV64MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(8)
|
||||
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
||||
v0.AuxInt = int64ToAuxInt(0)
|
||||
v1 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem)
|
||||
v1.AuxInt = int32ToAuxInt(4)
|
||||
v2 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem)
|
||||
v2.AddArg3(ptr, v0, mem)
|
||||
v1.AddArg3(ptr, v0, v2)
|
||||
v.AddArg3(ptr, v0, v1)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [16] {t} ptr mem)
|
||||
// cond: t.Alignment()%8 == 0
|
||||
// result: (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))
|
||||
for {
|
||||
if auxIntToInt64(v.AuxInt) != 16 {
|
||||
break
|
||||
}
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(t.Alignment()%8 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpRISCV64MOVDstore)
|
||||
v.AuxInt = int32ToAuxInt(8)
|
||||
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
||||
v0.AuxInt = int64ToAuxInt(0)
|
||||
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
|
||||
v1.AddArg3(ptr, v0, mem)
|
||||
v.AddArg3(ptr, v0, v1)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [24] {t} ptr mem)
|
||||
// cond: t.Alignment()%8 == 0
|
||||
// result: (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))
|
||||
for {
|
||||
if auxIntToInt64(v.AuxInt) != 24 {
|
||||
break
|
||||
}
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(t.Alignment()%8 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpRISCV64MOVDstore)
|
||||
v.AuxInt = int32ToAuxInt(16)
|
||||
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
||||
v0.AuxInt = int64ToAuxInt(0)
|
||||
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
|
||||
v1.AuxInt = int32ToAuxInt(8)
|
||||
v2 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
|
||||
v2.AddArg3(ptr, v0, mem)
|
||||
v1.AddArg3(ptr, v0, v2)
|
||||
v.AddArg3(ptr, v0, v1)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [32] {t} ptr mem)
|
||||
// cond: t.Alignment()%8 == 0
|
||||
// result: (MOVDstore [24] ptr (MOVDconst [0]) (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))))
|
||||
for {
|
||||
if auxIntToInt64(v.AuxInt) != 32 {
|
||||
break
|
||||
}
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(t.Alignment()%8 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpRISCV64MOVDstore)
|
||||
v.AuxInt = int32ToAuxInt(24)
|
||||
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
||||
v0.AuxInt = int64ToAuxInt(0)
|
||||
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
|
||||
v1.AuxInt = int32ToAuxInt(16)
|
||||
v2 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
|
||||
v2.AuxInt = int32ToAuxInt(8)
|
||||
v3 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
|
||||
v3.AddArg3(ptr, v0, mem)
|
||||
v2.AddArg3(ptr, v0, v3)
|
||||
v1.AddArg3(ptr, v0, v2)
|
||||
v.AddArg3(ptr, v0, v1)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] {t} ptr mem)
|
||||
// cond: s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0
|
||||
// result: (DUFFZERO [8 * (128 - s/8)] ptr mem)
|
||||
// cond: s <= 24*moveSize(t.Alignment(), config)
|
||||
// result: (LoweredZero [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0) {
|
||||
if !(s <= 24*moveSize(t.Alignment(), config)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpRISCV64DUFFZERO)
|
||||
v.AuxInt = int64ToAuxInt(8 * (128 - s/8))
|
||||
v.reset(OpRISCV64LoweredZero)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(s), int32(t.Alignment())))
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] {t} ptr mem)
|
||||
// result: (LoweredZero [t.Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem)
|
||||
// cond: s > 24*moveSize(t.Alignment(), config)
|
||||
// result: (LoweredZeroLoop [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
v.reset(OpRISCV64LoweredZero)
|
||||
v.AuxInt = int64ToAuxInt(t.Alignment())
|
||||
v0 := b.NewValue0(v.Pos, OpRISCV64ADD, ptr.Type)
|
||||
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
||||
v1.AuxInt = int64ToAuxInt(s - moveSize(t.Alignment(), config))
|
||||
v0.AddArg2(ptr, v1)
|
||||
v.AddArg3(ptr, v0, mem)
|
||||
if !(s > 24*moveSize(t.Alignment(), config)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpRISCV64LoweredZeroLoop)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(s), int32(t.Alignment())))
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteBlockRISCV64(b *Block) bool {
|
||||
typ := &b.Func.Config.Types
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue