cmd/compile: optimise float <-> int register moves on riscv64

Use the FMV* instructions to move values between the floating point and
integer register files.

Note: I'm unsure why there is a slowdown in the Float32bits benchmark,
I've checked and an FMVXS instruction is being used as expected. There
are multiple loads and other instructions in the main loop.

goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
                    │ fmv-before.txt │            fmv-after.txt            │
                    │     sec/op     │   sec/op     vs base                │
Acos                     122.7n ± 0%   122.7n ± 0%        ~ (p=1.000 n=10)
Acosh                    197.2n ± 0%   191.5n ± 0%   -2.89% (p=0.000 n=10)
Asin                     122.7n ± 0%   122.7n ± 0%        ~ (p=0.474 n=10)
Asinh                    231.0n ± 0%   224.1n ± 0%   -2.99% (p=0.000 n=10)
Atan                     91.39n ± 0%   91.41n ± 0%        ~ (p=0.465 n=10)
Atanh                    210.3n ± 0%   203.4n ± 0%   -3.26% (p=0.000 n=10)
Atan2                    149.6n ± 0%   149.6n ± 0%        ~ (p=0.721 n=10)
Cbrt                     176.5n ± 0%   165.9n ± 0%   -6.01% (p=0.000 n=10)
Ceil                     25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Copysign                 3.756n ± 0%   3.756n ± 0%        ~ (p=0.149 n=10)
Cos                      95.15n ± 0%   95.15n ± 0%        ~ (p=0.374 n=10)
Cosh                     228.6n ± 0%   224.7n ± 0%   -1.71% (p=0.000 n=10)
Erf                      115.2n ± 0%   115.2n ± 0%        ~ (p=0.474 n=10)
Erfc                     116.4n ± 0%   116.4n ± 0%        ~ (p=0.628 n=10)
Erfinv                   133.3n ± 0%   133.3n ± 0%        ~ (p=1.000 n=10)
Erfcinv                  133.3n ± 0%   133.3n ± 0%        ~ (p=1.000 n=10)
Exp                      194.1n ± 0%   190.3n ± 0%   -1.93% (p=0.000 n=10)
ExpGo                    204.7n ± 0%   200.3n ± 0%   -2.15% (p=0.000 n=10)
Expm1                    137.7n ± 0%   135.2n ± 0%   -1.82% (p=0.000 n=10)
Exp2                     173.4n ± 0%   169.0n ± 0%   -2.54% (p=0.000 n=10)
Exp2Go                   182.8n ± 0%   178.4n ± 0%   -2.41% (p=0.000 n=10)
Abs                      3.756n ± 0%   3.756n ± 0%        ~ (p=0.157 n=10)
Dim                      12.52n ± 0%   12.52n ± 0%        ~ (p=0.737 n=10)
Floor                    25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Max                      21.29n ± 0%   20.03n ± 0%   -5.92% (p=0.000 n=10)
Min                      21.28n ± 0%   20.04n ± 0%   -5.85% (p=0.000 n=10)
Mod                      344.9n ± 0%   319.2n ± 0%   -7.45% (p=0.000 n=10)
Frexp                    55.71n ± 0%   48.85n ± 0%  -12.30% (p=0.000 n=10)
Gamma                    165.9n ± 0%   167.8n ± 0%   +1.15% (p=0.000 n=10)
Hypot                    73.24n ± 0%   70.74n ± 0%   -3.41% (p=0.000 n=10)
HypotGo                  84.50n ± 0%   82.63n ± 0%   -2.21% (p=0.000 n=10)
Ilogb                    49.45n ± 0%   45.70n ± 0%   -7.59% (p=0.000 n=10)
J0                       556.5n ± 0%   544.0n ± 0%   -2.25% (p=0.000 n=10)
J1                       555.3n ± 0%   542.8n ± 0%   -2.24% (p=0.000 n=10)
Jn                       1.181µ ± 0%   1.156µ ± 0%   -2.12% (p=0.000 n=10)
Ldexp                    59.47n ± 0%   53.84n ± 0%   -9.47% (p=0.000 n=10)
Lgamma                   167.2n ± 0%   154.6n ± 0%   -7.51% (p=0.000 n=10)
Log                      160.9n ± 0%   154.6n ± 0%   -3.92% (p=0.000 n=10)
Logb                     49.45n ± 0%   45.70n ± 0%   -7.58% (p=0.000 n=10)
Log1p                    147.1n ± 0%   137.1n ± 0%   -6.80% (p=0.000 n=10)
Log10                    162.1n ± 1%   154.6n ± 0%   -4.63% (p=0.000 n=10)
Log2                     66.99n ± 0%   60.72n ± 0%   -9.36% (p=0.000 n=10)
Modf                     29.42n ± 0%   26.29n ± 0%  -10.64% (p=0.000 n=10)
Nextafter32              41.95n ± 0%   37.88n ± 0%   -9.70% (p=0.000 n=10)
Nextafter64              38.82n ± 0%   33.49n ± 0%  -13.73% (p=0.000 n=10)
PowInt                   252.3n ± 0%   237.3n ± 0%   -5.95% (p=0.000 n=10)
PowFrac                  615.5n ± 0%   589.7n ± 0%   -4.19% (p=0.000 n=10)
Pow10Pos                 10.64n ± 0%   10.64n ± 0%        ~ (p=1.000 n=10)
Pow10Neg                 24.42n ± 0%   15.02n ± 0%  -38.49% (p=0.000 n=10)
Round                    21.91n ± 0%   18.16n ± 0%  -17.12% (p=0.000 n=10)
RoundToEven              24.42n ± 0%   21.29n ± 0%  -12.84% (p=0.000 n=10)
Remainder                308.0n ± 0%   291.2n ± 0%   -5.44% (p=0.000 n=10)
Signbit                  10.02n ± 0%   10.02n ± 0%        ~ (p=1.000 n=10)
Sin                      102.7n ± 0%   102.7n ± 0%        ~ (p=0.211 n=10)
Sincos                   124.0n ± 1%   123.3n ± 0%   -0.56% (p=0.002 n=10)
Sinh                     239.1n ± 0%   234.7n ± 0%   -1.84% (p=0.000 n=10)
SqrtIndirect             2.504n ± 0%   2.504n ± 0%        ~ (p=0.303 n=10)
SqrtLatency              15.03n ± 0%   15.02n ± 0%        ~ (p=0.598 n=10)
SqrtIndirectLatency      15.02n ± 0%   15.02n ± 0%        ~ (p=0.907 n=10)
SqrtGoLatency            165.3n ± 0%   157.2n ± 0%   -4.90% (p=0.000 n=10)
SqrtPrime                3.801µ ± 0%   3.802µ ± 0%        ~ (p=1.000 n=10)
Tan                      125.2n ± 0%   125.2n ± 0%        ~ (p=0.458 n=10)
Tanh                     244.2n ± 0%   239.9n ± 0%   -1.76% (p=0.000 n=10)
Trunc                    25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Y0                       550.2n ± 0%   538.1n ± 0%   -2.21% (p=0.000 n=10)
Y1                       552.8n ± 0%   540.6n ± 0%   -2.21% (p=0.000 n=10)
Yn                       1.168µ ± 0%   1.143µ ± 0%   -2.14% (p=0.000 n=10)
Float64bits              8.139n ± 0%   4.385n ± 0%  -46.13% (p=0.000 n=10)
Float64frombits          7.512n ± 0%   3.759n ± 0%  -49.96% (p=0.000 n=10)
Float32bits              8.138n ± 0%   9.393n ± 0%  +15.42% (p=0.000 n=10)
Float32frombits          7.513n ± 0%   3.757n ± 0%  -49.98% (p=0.000 n=10)
FMA                      3.756n ± 0%   3.756n ± 0%        ~ (p=0.246 n=10)
geomean                  77.43n        72.42n        -6.47%

Change-Id: I8dac69b1d17cb3d2af78d1c844d2b5d80000d667
Reviewed-on: https://go-review.googlesource.com/c/go/+/599235
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Michael Munday <mikemndy@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Michael Munday 2024-07-17 23:54:43 +01:00 committed by Gopher Robot
parent 7a1679d7ae
commit fcc036f03b
6 changed files with 372 additions and 3 deletions

View file

@ -417,7 +417,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FABSD, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD, case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FABSD, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD,
ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVXS, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FMVXD,
ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD,
ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW, ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW,

View file

@ -299,6 +299,11 @@
(base.Op != OpSB || !config.ctxt.Flag_dynlink) => (base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOV(B|BU|H|HU|W|WU|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOV(B|BU|H|HU|W|WU|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
(FMOV(W|D)load [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) &&
is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
(base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOV(W|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOV(B|H|W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && (MOV(B|H|W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) &&
is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
(base.Op != OpSB || !config.ctxt.Flag_dynlink) => (base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
@ -309,15 +314,26 @@
(base.Op != OpSB || !config.ctxt.Flag_dynlink) => (base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOV(B|H|W|D)storezero [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOV(B|H|W|D)storezero [off1+off2] {mergeSym(sym1,sym2)} base mem)
(FMOV(W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) &&
is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
(base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOV(W|D)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOV(B|BU|H|HU|W|WU|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOV(B|BU|H|HU|W|WU|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
(MOV(B|BU|H|HU|W|WU|D)load [off1+int32(off2)] {sym} base mem) (MOV(B|BU|H|HU|W|WU|D)load [off1+int32(off2)] {sym} base mem)
(FMOV(W|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
(FMOV(W|D)load [off1+int32(off2)] {sym} base mem)
(MOV(B|H|W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => (MOV(B|H|W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
(MOV(B|H|W|D)store [off1+int32(off2)] {sym} base val mem) (MOV(B|H|W|D)store [off1+int32(off2)] {sym} base val mem)
(MOV(B|H|W|D)storezero [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOV(B|H|W|D)storezero [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
(MOV(B|H|W|D)storezero [off1+int32(off2)] {sym} base mem) (MOV(B|H|W|D)storezero [off1+int32(off2)] {sym} base mem)
(FMOV(W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
(FMOV(W|D)store [off1+int32(off2)] {sym} base val mem)
// Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis // Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis
// with OffPtr -> ADDI. // with OffPtr -> ADDI.
(ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x) (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x)
@ -701,6 +717,13 @@
(MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem) (MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem)
(MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem) (MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem)
// Replace load from same location as preceding store with copy.
(MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXD x)
(FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVDX x)
(MOVWload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXS x)
(MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVWUreg (FMVXS x))
(FMOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVSX x)
// If a register move has only 1 use, just use the same register without emitting instruction // If a register move has only 1 use, just use the same register without emitting instruction
// MOVnop does not emit an instruction, only for ensuring the type. // MOVnop does not emit an instruction, only for ensuring the type.
(MOVDreg x) && x.Uses == 1 => (MOVDnop x) (MOVDreg x) && x.Uses == 1 => (MOVDnop x)

View file

@ -453,7 +453,8 @@ func init() {
{name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS", commutative: true, typ: "Float32"}, // -(arg0 * arg1) - arg2 {name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS", commutative: true, typ: "Float32"}, // -(arg0 * arg1) - arg2
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS", typ: "Float32"}, // sqrt(arg0) {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS", typ: "Float32"}, // sqrt(arg0)
{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS", typ: "Float32"}, // -arg0 {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS", typ: "Float32"}, // -arg0
{name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"}, // reinterpret arg0 as float {name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"}, // reinterpret arg0 as float32
{name: "FMVXS", argLength: 1, reg: fpgp, asm: "FMVXS", typ: "Int32"}, // reinterpret arg0 as int32, sign extended to 64 bits
{name: "FCVTSW", argLength: 1, reg: gpfp, asm: "FCVTSW", typ: "Float32"}, // float32(low 32 bits of arg0) {name: "FCVTSW", argLength: 1, reg: gpfp, asm: "FCVTSW", typ: "Float32"}, // float32(low 32 bits of arg0)
{name: "FCVTSL", argLength: 1, reg: gpfp, asm: "FCVTSL", typ: "Float32"}, // float32(arg0) {name: "FCVTSL", argLength: 1, reg: gpfp, asm: "FCVTSL", typ: "Float32"}, // float32(arg0)
{name: "FCVTWS", argLength: 1, reg: fpgp, asm: "FCVTWS", typ: "Int32"}, // int32(arg0) {name: "FCVTWS", argLength: 1, reg: fpgp, asm: "FCVTWS", typ: "Int32"}, // int32(arg0)
@ -480,7 +481,8 @@ func init() {
{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD", typ: "Float64"}, // -arg0 {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD", typ: "Float64"}, // -arg0
{name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD", typ: "Float64"}, // abs(arg0) {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD", typ: "Float64"}, // abs(arg0)
{name: "FSGNJD", argLength: 2, reg: fp21, asm: "FSGNJD", typ: "Float64"}, // copy sign of arg1 to arg0 {name: "FSGNJD", argLength: 2, reg: fp21, asm: "FSGNJD", typ: "Float64"}, // copy sign of arg1 to arg0
{name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"}, // reinterpret arg0 as float {name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"}, // reinterpret arg0 as float64
{name: "FMVXD", argLength: 1, reg: fpgp, asm: "FMVXD", typ: "Int64"}, // reinterpret arg0 as int64
{name: "FCVTDW", argLength: 1, reg: gpfp, asm: "FCVTDW", typ: "Float64"}, // float64(low 32 bits of arg0) {name: "FCVTDW", argLength: 1, reg: gpfp, asm: "FCVTDW", typ: "Float64"}, // float64(low 32 bits of arg0)
{name: "FCVTDL", argLength: 1, reg: gpfp, asm: "FCVTDL", typ: "Float64"}, // float64(arg0) {name: "FCVTDL", argLength: 1, reg: gpfp, asm: "FCVTDL", typ: "Float64"}, // float64(arg0)
{name: "FCVTWD", argLength: 1, reg: fpgp, asm: "FCVTWD", typ: "Int32"}, // int32(arg0) {name: "FCVTWD", argLength: 1, reg: fpgp, asm: "FCVTWD", typ: "Int32"}, // int32(arg0)

View file

@ -2600,6 +2600,7 @@ const (
OpRISCV64FSQRTS OpRISCV64FSQRTS
OpRISCV64FNEGS OpRISCV64FNEGS
OpRISCV64FMVSX OpRISCV64FMVSX
OpRISCV64FMVXS
OpRISCV64FCVTSW OpRISCV64FCVTSW
OpRISCV64FCVTSL OpRISCV64FCVTSL
OpRISCV64FCVTWS OpRISCV64FCVTWS
@ -2625,6 +2626,7 @@ const (
OpRISCV64FABSD OpRISCV64FABSD
OpRISCV64FSGNJD OpRISCV64FSGNJD
OpRISCV64FMVDX OpRISCV64FMVDX
OpRISCV64FMVXD
OpRISCV64FCVTDW OpRISCV64FCVTDW
OpRISCV64FCVTDL OpRISCV64FCVTDL
OpRISCV64FCVTWD OpRISCV64FCVTWD
@ -34985,6 +34987,19 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "FMVXS",
argLen: 1,
asm: riscv.AFMVXS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
},
},
{ {
name: "FCVTSW", name: "FCVTSW",
argLen: 1, argLen: 1,
@ -35345,6 +35360,19 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "FMVXD",
argLen: 1,
asm: riscv.AFMVXD,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
},
},
{ {
name: "FCVTDW", name: "FCVTDW",
argLen: 1, argLen: 1,

View file

@ -517,6 +517,14 @@ func rewriteValueRISCV64(v *Value) bool {
return rewriteValueRISCV64_OpRISCV64FMADDD(v) return rewriteValueRISCV64_OpRISCV64FMADDD(v)
case OpRISCV64FMADDS: case OpRISCV64FMADDS:
return rewriteValueRISCV64_OpRISCV64FMADDS(v) return rewriteValueRISCV64_OpRISCV64FMADDS(v)
case OpRISCV64FMOVDload:
return rewriteValueRISCV64_OpRISCV64FMOVDload(v)
case OpRISCV64FMOVDstore:
return rewriteValueRISCV64_OpRISCV64FMOVDstore(v)
case OpRISCV64FMOVWload:
return rewriteValueRISCV64_OpRISCV64FMOVWload(v)
case OpRISCV64FMOVWstore:
return rewriteValueRISCV64_OpRISCV64FMOVWstore(v)
case OpRISCV64FMSUBD: case OpRISCV64FMSUBD:
return rewriteValueRISCV64_OpRISCV64FMSUBD(v) return rewriteValueRISCV64_OpRISCV64FMSUBD(v)
case OpRISCV64FMSUBS: case OpRISCV64FMSUBS:
@ -3844,6 +3852,250 @@ func rewriteValueRISCV64_OpRISCV64FMADDS(v *Value) bool {
} }
return false return false
} }
func rewriteValueRISCV64_OpRISCV64FMOVDload(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
// match: (FMOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym1 := auxToSym(v.Aux)
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := auxIntToInt32(v_0.AuxInt)
sym2 := auxToSym(v_0.Aux)
base := v_0.Args[0]
mem := v_1
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpRISCV64FMOVDload)
v.AuxInt = int32ToAuxInt(off1 + off2)
v.Aux = symToAux(mergeSym(sym1, sym2))
v.AddArg2(base, mem)
return true
}
// match: (FMOVDload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(int64(off1)+off2)
// result: (FMOVDload [off1+int32(off2)] {sym} base mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := auxIntToInt64(v_0.AuxInt)
base := v_0.Args[0]
mem := v_1
if !(is32Bit(int64(off1) + off2)) {
break
}
v.reset(OpRISCV64FMOVDload)
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
v.Aux = symToAux(sym)
v.AddArg2(base, mem)
return true
}
// match: (FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _))
// cond: isSamePtr(ptr1, ptr2)
// result: (FMVDX x)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr1 := v_0
if v_1.Op != OpRISCV64MOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
break
}
x := v_1.Args[1]
ptr2 := v_1.Args[0]
if !(isSamePtr(ptr1, ptr2)) {
break
}
v.reset(OpRISCV64FMVDX)
v.AddArg(x)
return true
}
return false
}
func rewriteValueRISCV64_OpRISCV64FMOVDstore(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
// match: (FMOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym1 := auxToSym(v.Aux)
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := auxIntToInt32(v_0.AuxInt)
sym2 := auxToSym(v_0.Aux)
base := v_0.Args[0]
val := v_1
mem := v_2
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpRISCV64FMOVDstore)
v.AuxInt = int32ToAuxInt(off1 + off2)
v.Aux = symToAux(mergeSym(sym1, sym2))
v.AddArg3(base, val, mem)
return true
}
// match: (FMOVDstore [off1] {sym} (ADDI [off2] base) val mem)
// cond: is32Bit(int64(off1)+off2)
// result: (FMOVDstore [off1+int32(off2)] {sym} base val mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := auxIntToInt64(v_0.AuxInt)
base := v_0.Args[0]
val := v_1
mem := v_2
if !(is32Bit(int64(off1) + off2)) {
break
}
v.reset(OpRISCV64FMOVDstore)
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
v.Aux = symToAux(sym)
v.AddArg3(base, val, mem)
return true
}
return false
}
func rewriteValueRISCV64_OpRISCV64FMOVWload(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
// match: (FMOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FMOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym1 := auxToSym(v.Aux)
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := auxIntToInt32(v_0.AuxInt)
sym2 := auxToSym(v_0.Aux)
base := v_0.Args[0]
mem := v_1
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpRISCV64FMOVWload)
v.AuxInt = int32ToAuxInt(off1 + off2)
v.Aux = symToAux(mergeSym(sym1, sym2))
v.AddArg2(base, mem)
return true
}
// match: (FMOVWload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(int64(off1)+off2)
// result: (FMOVWload [off1+int32(off2)] {sym} base mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := auxIntToInt64(v_0.AuxInt)
base := v_0.Args[0]
mem := v_1
if !(is32Bit(int64(off1) + off2)) {
break
}
v.reset(OpRISCV64FMOVWload)
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
v.Aux = symToAux(sym)
v.AddArg2(base, mem)
return true
}
// match: (FMOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _))
// cond: isSamePtr(ptr1, ptr2)
// result: (FMVSX x)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr1 := v_0
if v_1.Op != OpRISCV64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
break
}
x := v_1.Args[1]
ptr2 := v_1.Args[0]
if !(isSamePtr(ptr1, ptr2)) {
break
}
v.reset(OpRISCV64FMVSX)
v.AddArg(x)
return true
}
return false
}
func rewriteValueRISCV64_OpRISCV64FMOVWstore(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
// match: (FMOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FMOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym1 := auxToSym(v.Aux)
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := auxIntToInt32(v_0.AuxInt)
sym2 := auxToSym(v_0.Aux)
base := v_0.Args[0]
val := v_1
mem := v_2
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpRISCV64FMOVWstore)
v.AuxInt = int32ToAuxInt(off1 + off2)
v.Aux = symToAux(mergeSym(sym1, sym2))
v.AddArg3(base, val, mem)
return true
}
// match: (FMOVWstore [off1] {sym} (ADDI [off2] base) val mem)
// cond: is32Bit(int64(off1)+off2)
// result: (FMOVWstore [off1+int32(off2)] {sym} base val mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := auxIntToInt64(v_0.AuxInt)
base := v_0.Args[0]
val := v_1
mem := v_2
if !(is32Bit(int64(off1) + off2)) {
break
}
v.reset(OpRISCV64FMOVWstore)
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
v.Aux = symToAux(sym)
v.AddArg3(base, val, mem)
return true
}
return false
}
func rewriteValueRISCV64_OpRISCV64FMSUBD(v *Value) bool { func rewriteValueRISCV64_OpRISCV64FMSUBD(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]
@ -4977,6 +5229,25 @@ func rewriteValueRISCV64_OpRISCV64MOVDload(v *Value) bool {
v.AddArg2(base, mem) v.AddArg2(base, mem)
return true return true
} }
// match: (MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _))
// cond: isSamePtr(ptr1, ptr2)
// result: (FMVXD x)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr1 := v_0
if v_1.Op != OpRISCV64FMOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
break
}
x := v_1.Args[1]
ptr2 := v_1.Args[0]
if !(isSamePtr(ptr1, ptr2)) {
break
}
v.reset(OpRISCV64FMVXD)
v.AddArg(x)
return true
}
return false return false
} }
func rewriteValueRISCV64_OpRISCV64MOVDnop(v *Value) bool { func rewriteValueRISCV64_OpRISCV64MOVDnop(v *Value) bool {
@ -5658,6 +5929,7 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
config := b.Func.Config config := b.Func.Config
typ := &b.Func.Config.Types
// match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) // match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem) // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
@ -5701,6 +5973,27 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
v.AddArg2(base, mem) v.AddArg2(base, mem)
return true return true
} }
// match: (MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _))
// cond: isSamePtr(ptr1, ptr2)
// result: (MOVWUreg (FMVXS x))
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr1 := v_0
if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
break
}
x := v_1.Args[1]
ptr2 := v_1.Args[0]
if !(isSamePtr(ptr1, ptr2)) {
break
}
v.reset(OpRISCV64MOVWUreg)
v0 := b.NewValue0(v_1.Pos, OpRISCV64FMVXS, typ.Int32)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false return false
} }
func rewriteValueRISCV64_OpRISCV64MOVWUreg(v *Value) bool { func rewriteValueRISCV64_OpRISCV64MOVWUreg(v *Value) bool {
@ -5891,6 +6184,25 @@ func rewriteValueRISCV64_OpRISCV64MOVWload(v *Value) bool {
v.AddArg2(base, mem) v.AddArg2(base, mem)
return true return true
} }
// match: (MOVWload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _))
// cond: isSamePtr(ptr1, ptr2)
// result: (FMVXS x)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr1 := v_0
if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
break
}
x := v_1.Args[1]
ptr2 := v_1.Args[0]
if !(isSamePtr(ptr1, ptr2)) {
break
}
v.reset(OpRISCV64FMVXS)
v.AddArg(x)
return true
}
return false return false
} }
func rewriteValueRISCV64_OpRISCV64MOVWreg(v *Value) bool { func rewriteValueRISCV64_OpRISCV64MOVWreg(v *Value) bool {

View file

@ -160,6 +160,7 @@ func fromFloat64(f64 float64) uint64 {
// loong64:"MOVV\tF.*, R.*" // loong64:"MOVV\tF.*, R.*"
// ppc64x:"MFVSRD" // ppc64x:"MFVSRD"
// mips64/hardfloat:"MOVV\tF.*, R.*" // mips64/hardfloat:"MOVV\tF.*, R.*"
// riscv64:"FMVXD"
return math.Float64bits(f64+1) + 1 return math.Float64bits(f64+1) + 1
} }
@ -168,6 +169,7 @@ func fromFloat32(f32 float32) uint32 {
// arm64:"FMOVS\tF.*, R.*" // arm64:"FMOVS\tF.*, R.*"
// loong64:"MOVW\tF.*, R.*" // loong64:"MOVW\tF.*, R.*"
// mips64/hardfloat:"MOVW\tF.*, R.*" // mips64/hardfloat:"MOVW\tF.*, R.*"
// riscv64:"FMVXW"
return math.Float32bits(f32+1) + 1 return math.Float32bits(f32+1) + 1
} }
@ -177,6 +179,7 @@ func toFloat64(u64 uint64) float64 {
// loong64:"MOVV\tR.*, F.*" // loong64:"MOVV\tR.*, F.*"
// ppc64x:"MTVSRD" // ppc64x:"MTVSRD"
// mips64/hardfloat:"MOVV\tR.*, F.*" // mips64/hardfloat:"MOVV\tR.*, F.*"
// riscv64:"FMVDX"
return math.Float64frombits(u64+1) + 1 return math.Float64frombits(u64+1) + 1
} }
@ -185,6 +188,7 @@ func toFloat32(u32 uint32) float32 {
// arm64:"FMOVS\tR.*, F.*" // arm64:"FMOVS\tR.*, F.*"
// loong64:"MOVW\tR.*, F.*" // loong64:"MOVW\tR.*, F.*"
// mips64/hardfloat:"MOVW\tR.*, F.*" // mips64/hardfloat:"MOVW\tR.*, F.*"
// riscv64:"FMVWX"
return math.Float32frombits(u32+1) + 1 return math.Float32frombits(u32+1) + 1
} }