mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: use generated loops instead of DUFFCOPY on riscv64
MemmoveKnownSize112-4 632.1Mi ± 1% 1288.5Mi ± 0% +103.85% (p=0.000 n=10) MemmoveKnownSize128-4 636.1Mi ± 0% 1280.9Mi ± 1% +101.36% (p=0.000 n=10) MemmoveKnownSize192-4 645.3Mi ± 0% 1306.9Mi ± 1% +102.53% (p=0.000 n=10) MemmoveKnownSize248-4 650.2Mi ± 2% 1312.5Mi ± 1% +101.87% (p=0.000 n=10) MemmoveKnownSize256-4 650.7Mi ± 0% 1303.6Mi ± 1% +100.33% (p=0.000 n=10) MemmoveKnownSize512-4 658.2Mi ± 1% 1293.9Mi ± 0% +96.60% (p=0.000 n=10) MemmoveKnownSize1024-4 662.1Mi ± 0% 1312.6Mi ± 0% +98.26% (p=0.000 n=10) Change-Id: I43681ca029880025558b33ddc4295da3947c9b28 Reviewed-on: https://go-review.googlesource.com/c/go/+/700537 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Mark Freeman <markfreeman@google.com>
This commit is contained in:
parent
879ff736d3
commit
4dac9e093f
5 changed files with 186 additions and 222 deletions
|
|
@ -117,6 +117,7 @@ func init() {
|
|||
|
||||
regCtxt := regNamed["X26"]
|
||||
callerSave := gpMask | fpMask | regNamed["g"]
|
||||
r5toR6 := regNamed["X5"] | regNamed["X6"]
|
||||
|
||||
var (
|
||||
gpstore = regInfo{inputs: []regMask{gpspsbMask, gpspMask, 0}} // SB in first input so we can load from a global, but not in second to avoid using SB as a temporary register
|
||||
|
|
@ -354,27 +355,51 @@ func init() {
|
|||
},
|
||||
|
||||
// general unaligned move
|
||||
// arg0 = address of dst memory (in X5, changed as side effect)
|
||||
// arg1 = address of src memory (in X6, changed as side effect)
|
||||
// arg2 = address of the last element of src (can't be X7 as we clobber it before using arg2)
|
||||
// arg3 = mem
|
||||
// auxint = alignment
|
||||
// clobbers X7 as a tmp register.
|
||||
// arg0 = address of dst memory (clobber)
|
||||
// arg1 = address of src memory (clobber)
|
||||
// arg2 = mem
|
||||
// auxint = size and type alignment
|
||||
// returns mem
|
||||
// mov (X6), X7
|
||||
// mov X7, (X5)
|
||||
// ADD $sz, X5
|
||||
// ADD $sz, X6
|
||||
// BGEU Rarg2, X5, -4(PC)
|
||||
// mov (offset)(Rarg1), TMP
|
||||
// mov TMP, (offset)(Rarg0)
|
||||
{
|
||||
name: "LoweredMove",
|
||||
aux: "Int64",
|
||||
argLength: 4,
|
||||
aux: "SymValAndOff",
|
||||
symEffect: "Write",
|
||||
argLength: 3,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{regNamed["X5"], regNamed["X6"], gpMask &^ regNamed["X7"]},
|
||||
clobbers: regNamed["X5"] | regNamed["X6"] | regNamed["X7"],
|
||||
inputs: []regMask{gpMask &^ regNamed["X5"], gpMask &^ regNamed["X5"]},
|
||||
clobbers: regNamed["X5"],
|
||||
},
|
||||
faultOnNilArg0: true,
|
||||
faultOnNilArg1: true,
|
||||
},
|
||||
|
||||
// general unaligned move
|
||||
// arg0 = address of dst memory (clobber)
|
||||
// arg1 = address of src memory (clobber)
|
||||
// arg3 = mem
|
||||
// auxint = alignment
|
||||
// returns mem
|
||||
// ADD $sz, X6
|
||||
//loop:
|
||||
// mov (Rarg1), X5
|
||||
// mov X5, (Rarg0)
|
||||
// ...rest 7 mov...
|
||||
// ADD $sz, Rarg0
|
||||
// ADD $sz, Rarg1
|
||||
// BNE X6, Rarg1, loop
|
||||
{
|
||||
name: "LoweredMoveLoop",
|
||||
aux: "SymValAndOff",
|
||||
argLength: 3,
|
||||
symEffect: "Write",
|
||||
reg: regInfo{
|
||||
inputs: []regMask{gpMask &^ r5toR6, gpMask &^ r5toR6},
|
||||
clobbers: r5toR6,
|
||||
clobbersArg0: true,
|
||||
clobbersArg1: true,
|
||||
},
|
||||
typ: "Mem",
|
||||
faultOnNilArg0: true,
|
||||
faultOnNilArg1: true,
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue