mirror of
				https://github.com/golang/go.git
				synced 2025-10-31 16:50:58 +00:00 
			
		
		
		
	cmd/compile: optimise float <-> int register moves on riscv64
Use the FMV* instructions to move values between the floating point and
integer register files.
Note: I'm unsure why there is a slowdown in the Float32bits benchmark,
I've checked and an FMVXS instruction is being used as expected. There
are multiple loads and other instructions in the main loop.
goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
                    │ fmv-before.txt │            fmv-after.txt            │
                    │     sec/op     │   sec/op     vs base                │
Acos                     122.7n ± 0%   122.7n ± 0%        ~ (p=1.000 n=10)
Acosh                    197.2n ± 0%   191.5n ± 0%   -2.89% (p=0.000 n=10)
Asin                     122.7n ± 0%   122.7n ± 0%        ~ (p=0.474 n=10)
Asinh                    231.0n ± 0%   224.1n ± 0%   -2.99% (p=0.000 n=10)
Atan                     91.39n ± 0%   91.41n ± 0%        ~ (p=0.465 n=10)
Atanh                    210.3n ± 0%   203.4n ± 0%   -3.26% (p=0.000 n=10)
Atan2                    149.6n ± 0%   149.6n ± 0%        ~ (p=0.721 n=10)
Cbrt                     176.5n ± 0%   165.9n ± 0%   -6.01% (p=0.000 n=10)
Ceil                     25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Copysign                 3.756n ± 0%   3.756n ± 0%        ~ (p=0.149 n=10)
Cos                      95.15n ± 0%   95.15n ± 0%        ~ (p=0.374 n=10)
Cosh                     228.6n ± 0%   224.7n ± 0%   -1.71% (p=0.000 n=10)
Erf                      115.2n ± 0%   115.2n ± 0%        ~ (p=0.474 n=10)
Erfc                     116.4n ± 0%   116.4n ± 0%        ~ (p=0.628 n=10)
Erfinv                   133.3n ± 0%   133.3n ± 0%        ~ (p=1.000 n=10)
Erfcinv                  133.3n ± 0%   133.3n ± 0%        ~ (p=1.000 n=10)
Exp                      194.1n ± 0%   190.3n ± 0%   -1.93% (p=0.000 n=10)
ExpGo                    204.7n ± 0%   200.3n ± 0%   -2.15% (p=0.000 n=10)
Expm1                    137.7n ± 0%   135.2n ± 0%   -1.82% (p=0.000 n=10)
Exp2                     173.4n ± 0%   169.0n ± 0%   -2.54% (p=0.000 n=10)
Exp2Go                   182.8n ± 0%   178.4n ± 0%   -2.41% (p=0.000 n=10)
Abs                      3.756n ± 0%   3.756n ± 0%        ~ (p=0.157 n=10)
Dim                      12.52n ± 0%   12.52n ± 0%        ~ (p=0.737 n=10)
Floor                    25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Max                      21.29n ± 0%   20.03n ± 0%   -5.92% (p=0.000 n=10)
Min                      21.28n ± 0%   20.04n ± 0%   -5.85% (p=0.000 n=10)
Mod                      344.9n ± 0%   319.2n ± 0%   -7.45% (p=0.000 n=10)
Frexp                    55.71n ± 0%   48.85n ± 0%  -12.30% (p=0.000 n=10)
Gamma                    165.9n ± 0%   167.8n ± 0%   +1.15% (p=0.000 n=10)
Hypot                    73.24n ± 0%   70.74n ± 0%   -3.41% (p=0.000 n=10)
HypotGo                  84.50n ± 0%   82.63n ± 0%   -2.21% (p=0.000 n=10)
Ilogb                    49.45n ± 0%   45.70n ± 0%   -7.59% (p=0.000 n=10)
J0                       556.5n ± 0%   544.0n ± 0%   -2.25% (p=0.000 n=10)
J1                       555.3n ± 0%   542.8n ± 0%   -2.24% (p=0.000 n=10)
Jn                       1.181µ ± 0%   1.156µ ± 0%   -2.12% (p=0.000 n=10)
Ldexp                    59.47n ± 0%   53.84n ± 0%   -9.47% (p=0.000 n=10)
Lgamma                   167.2n ± 0%   154.6n ± 0%   -7.51% (p=0.000 n=10)
Log                      160.9n ± 0%   154.6n ± 0%   -3.92% (p=0.000 n=10)
Logb                     49.45n ± 0%   45.70n ± 0%   -7.58% (p=0.000 n=10)
Log1p                    147.1n ± 0%   137.1n ± 0%   -6.80% (p=0.000 n=10)
Log10                    162.1n ± 1%   154.6n ± 0%   -4.63% (p=0.000 n=10)
Log2                     66.99n ± 0%   60.72n ± 0%   -9.36% (p=0.000 n=10)
Modf                     29.42n ± 0%   26.29n ± 0%  -10.64% (p=0.000 n=10)
Nextafter32              41.95n ± 0%   37.88n ± 0%   -9.70% (p=0.000 n=10)
Nextafter64              38.82n ± 0%   33.49n ± 0%  -13.73% (p=0.000 n=10)
PowInt                   252.3n ± 0%   237.3n ± 0%   -5.95% (p=0.000 n=10)
PowFrac                  615.5n ± 0%   589.7n ± 0%   -4.19% (p=0.000 n=10)
Pow10Pos                 10.64n ± 0%   10.64n ± 0%        ~ (p=1.000 n=10)
Pow10Neg                 24.42n ± 0%   15.02n ± 0%  -38.49% (p=0.000 n=10)
Round                    21.91n ± 0%   18.16n ± 0%  -17.12% (p=0.000 n=10)
RoundToEven              24.42n ± 0%   21.29n ± 0%  -12.84% (p=0.000 n=10)
Remainder                308.0n ± 0%   291.2n ± 0%   -5.44% (p=0.000 n=10)
Signbit                  10.02n ± 0%   10.02n ± 0%        ~ (p=1.000 n=10)
Sin                      102.7n ± 0%   102.7n ± 0%        ~ (p=0.211 n=10)
Sincos                   124.0n ± 1%   123.3n ± 0%   -0.56% (p=0.002 n=10)
Sinh                     239.1n ± 0%   234.7n ± 0%   -1.84% (p=0.000 n=10)
SqrtIndirect             2.504n ± 0%   2.504n ± 0%        ~ (p=0.303 n=10)
SqrtLatency              15.03n ± 0%   15.02n ± 0%        ~ (p=0.598 n=10)
SqrtIndirectLatency      15.02n ± 0%   15.02n ± 0%        ~ (p=0.907 n=10)
SqrtGoLatency            165.3n ± 0%   157.2n ± 0%   -4.90% (p=0.000 n=10)
SqrtPrime                3.801µ ± 0%   3.802µ ± 0%        ~ (p=1.000 n=10)
Tan                      125.2n ± 0%   125.2n ± 0%        ~ (p=0.458 n=10)
Tanh                     244.2n ± 0%   239.9n ± 0%   -1.76% (p=0.000 n=10)
Trunc                    25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Y0                       550.2n ± 0%   538.1n ± 0%   -2.21% (p=0.000 n=10)
Y1                       552.8n ± 0%   540.6n ± 0%   -2.21% (p=0.000 n=10)
Yn                       1.168µ ± 0%   1.143µ ± 0%   -2.14% (p=0.000 n=10)
Float64bits              8.139n ± 0%   4.385n ± 0%  -46.13% (p=0.000 n=10)
Float64frombits          7.512n ± 0%   3.759n ± 0%  -49.96% (p=0.000 n=10)
Float32bits              8.138n ± 0%   9.393n ± 0%  +15.42% (p=0.000 n=10)
Float32frombits          7.513n ± 0%   3.757n ± 0%  -49.98% (p=0.000 n=10)
FMA                      3.756n ± 0%   3.756n ± 0%        ~ (p=0.246 n=10)
geomean                  77.43n        72.42n        -6.47%
Change-Id: I8dac69b1d17cb3d2af78d1c844d2b5d80000d667
Reviewed-on: https://go-review.googlesource.com/c/go/+/599235
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Michael Munday <mikemndy@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
			
			
This commit is contained in:
		
							parent
							
								
									7a1679d7ae
								
							
						
					
					
						commit
						fcc036f03b
					
				
					 6 changed files with 372 additions and 3 deletions
				
			
		|  | @ -417,7 +417,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { | |||
| 		p.To.Type = obj.TYPE_REG | ||||
| 		p.To.Reg = r | ||||
| 	case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FABSD, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD, | ||||
| 		ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX, | ||||
| 		ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVXS, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FMVXD, | ||||
| 		ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, | ||||
| 		ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, | ||||
| 		ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW, | ||||
|  |  | |||
|  | @ -299,6 +299,11 @@ | |||
| 	(base.Op != OpSB || !config.ctxt.Flag_dynlink) => | ||||
| 	(MOV(B|BU|H|HU|W|WU|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem) | ||||
| 
 | ||||
| (FMOV(W|D)load [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && | ||||
| 	is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && | ||||
| 	(base.Op != OpSB || !config.ctxt.Flag_dynlink) => | ||||
| 	(FMOV(W|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem) | ||||
| 
 | ||||
| (MOV(B|H|W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && | ||||
| 	is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && | ||||
| 	(base.Op != OpSB || !config.ctxt.Flag_dynlink) => | ||||
|  | @ -309,15 +314,26 @@ | |||
| 	(base.Op != OpSB || !config.ctxt.Flag_dynlink) => | ||||
| 	(MOV(B|H|W|D)storezero [off1+off2] {mergeSym(sym1,sym2)} base mem) | ||||
| 
 | ||||
| (FMOV(W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && | ||||
| 	is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && | ||||
| 	(base.Op != OpSB || !config.ctxt.Flag_dynlink) => | ||||
| 	(FMOV(W|D)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) | ||||
| 
 | ||||
| (MOV(B|BU|H|HU|W|WU|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => | ||||
| 	(MOV(B|BU|H|HU|W|WU|D)load [off1+int32(off2)] {sym} base mem) | ||||
| 
 | ||||
| (FMOV(W|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => | ||||
| 	(FMOV(W|D)load [off1+int32(off2)] {sym} base mem) | ||||
| 
 | ||||
| (MOV(B|H|W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => | ||||
| 	(MOV(B|H|W|D)store [off1+int32(off2)] {sym} base val mem) | ||||
| 
 | ||||
| (MOV(B|H|W|D)storezero [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => | ||||
| 	(MOV(B|H|W|D)storezero [off1+int32(off2)] {sym} base mem) | ||||
| 
 | ||||
| (FMOV(W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => | ||||
| 	(FMOV(W|D)store [off1+int32(off2)] {sym} base val mem) | ||||
| 
 | ||||
| // Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis | ||||
| // with OffPtr -> ADDI. | ||||
| (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x) | ||||
|  | @ -701,6 +717,13 @@ | |||
| (MOVHUreg <t> x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem) | ||||
| (MOVWUreg <t> x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem) | ||||
| 
 | ||||
| // Replace load from same location as preceding store with copy. | ||||
| (MOVDload  [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXD x) | ||||
| (FMOVDload [off] {sym} ptr1 (MOVDstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVDX x) | ||||
| (MOVWload  [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXS x) | ||||
| (MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVWUreg (FMVXS x)) | ||||
| (FMOVWload [off] {sym} ptr1 (MOVWstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVSX x) | ||||
| 
 | ||||
| // If a register move has only 1 use, just use the same register without emitting instruction | ||||
| // MOVnop does not emit an instruction, only for ensuring the type. | ||||
| (MOVDreg x) && x.Uses == 1 => (MOVDnop x) | ||||
|  |  | |||
|  | @ -453,7 +453,8 @@ func init() { | |||
| 		{name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS", commutative: true, typ: "Float32"},                                       // -(arg0 * arg1) - arg2 | ||||
| 		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS", typ: "Float32"},                                                            // sqrt(arg0) | ||||
| 		{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS", typ: "Float32"},                                                              // -arg0 | ||||
| 		{name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"},                                                              // reinterpret arg0 as float | ||||
| 		{name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"},                                                              // reinterpret arg0 as float32 | ||||
| 		{name: "FMVXS", argLength: 1, reg: fpgp, asm: "FMVXS", typ: "Int32"},                                                                // reinterpret arg0 as int32, sign extended to 64 bits | ||||
| 		{name: "FCVTSW", argLength: 1, reg: gpfp, asm: "FCVTSW", typ: "Float32"},                                                            // float32(low 32 bits of arg0) | ||||
| 		{name: "FCVTSL", argLength: 1, reg: gpfp, asm: "FCVTSL", typ: "Float32"},                                                            // float32(arg0) | ||||
| 		{name: "FCVTWS", argLength: 1, reg: fpgp, asm: "FCVTWS", typ: "Int32"},                                                              // int32(arg0) | ||||
|  | @ -480,7 +481,8 @@ func init() { | |||
| 		{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD", typ: "Float64"},                                                              // -arg0 | ||||
| 		{name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD", typ: "Float64"},                                                              // abs(arg0) | ||||
| 		{name: "FSGNJD", argLength: 2, reg: fp21, asm: "FSGNJD", typ: "Float64"},                                                            // copy sign of arg1 to arg0 | ||||
| 		{name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"},                                                              // reinterpret arg0 as float | ||||
| 		{name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"},                                                              // reinterpret arg0 as float64 | ||||
| 		{name: "FMVXD", argLength: 1, reg: fpgp, asm: "FMVXD", typ: "Int64"},                                                                // reinterpret arg0 as int64 | ||||
| 		{name: "FCVTDW", argLength: 1, reg: gpfp, asm: "FCVTDW", typ: "Float64"},                                                            // float64(low 32 bits of arg0) | ||||
| 		{name: "FCVTDL", argLength: 1, reg: gpfp, asm: "FCVTDL", typ: "Float64"},                                                            // float64(arg0) | ||||
| 		{name: "FCVTWD", argLength: 1, reg: fpgp, asm: "FCVTWD", typ: "Int32"},                                                              // int32(arg0) | ||||
|  |  | |||
|  | @ -2600,6 +2600,7 @@ const ( | |||
| 	OpRISCV64FSQRTS | ||||
| 	OpRISCV64FNEGS | ||||
| 	OpRISCV64FMVSX | ||||
| 	OpRISCV64FMVXS | ||||
| 	OpRISCV64FCVTSW | ||||
| 	OpRISCV64FCVTSL | ||||
| 	OpRISCV64FCVTWS | ||||
|  | @ -2625,6 +2626,7 @@ const ( | |||
| 	OpRISCV64FABSD | ||||
| 	OpRISCV64FSGNJD | ||||
| 	OpRISCV64FMVDX | ||||
| 	OpRISCV64FMVXD | ||||
| 	OpRISCV64FCVTDW | ||||
| 	OpRISCV64FCVTDL | ||||
| 	OpRISCV64FCVTWD | ||||
|  | @ -34985,6 +34987,19 @@ var opcodeTable = [...]opInfo{ | |||
| 			}, | ||||
| 		}, | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:   "FMVXS", | ||||
| 		argLen: 1, | ||||
| 		asm:    riscv.AFMVXS, | ||||
| 		reg: regInfo{ | ||||
| 			inputs: []inputInfo{ | ||||
| 				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 | ||||
| 			}, | ||||
| 			outputs: []outputInfo{ | ||||
| 				{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 | ||||
| 			}, | ||||
| 		}, | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:   "FCVTSW", | ||||
| 		argLen: 1, | ||||
|  | @ -35345,6 +35360,19 @@ var opcodeTable = [...]opInfo{ | |||
| 			}, | ||||
| 		}, | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:   "FMVXD", | ||||
| 		argLen: 1, | ||||
| 		asm:    riscv.AFMVXD, | ||||
| 		reg: regInfo{ | ||||
| 			inputs: []inputInfo{ | ||||
| 				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 | ||||
| 			}, | ||||
| 			outputs: []outputInfo{ | ||||
| 				{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 | ||||
| 			}, | ||||
| 		}, | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:   "FCVTDW", | ||||
| 		argLen: 1, | ||||
|  |  | |||
|  | @ -517,6 +517,14 @@ func rewriteValueRISCV64(v *Value) bool { | |||
| 		return rewriteValueRISCV64_OpRISCV64FMADDD(v) | ||||
| 	case OpRISCV64FMADDS: | ||||
| 		return rewriteValueRISCV64_OpRISCV64FMADDS(v) | ||||
| 	case OpRISCV64FMOVDload: | ||||
| 		return rewriteValueRISCV64_OpRISCV64FMOVDload(v) | ||||
| 	case OpRISCV64FMOVDstore: | ||||
| 		return rewriteValueRISCV64_OpRISCV64FMOVDstore(v) | ||||
| 	case OpRISCV64FMOVWload: | ||||
| 		return rewriteValueRISCV64_OpRISCV64FMOVWload(v) | ||||
| 	case OpRISCV64FMOVWstore: | ||||
| 		return rewriteValueRISCV64_OpRISCV64FMOVWstore(v) | ||||
| 	case OpRISCV64FMSUBD: | ||||
| 		return rewriteValueRISCV64_OpRISCV64FMSUBD(v) | ||||
| 	case OpRISCV64FMSUBS: | ||||
|  | @ -3844,6 +3852,250 @@ func rewriteValueRISCV64_OpRISCV64FMADDS(v *Value) bool { | |||
| 	} | ||||
| 	return false | ||||
| } | ||||
| func rewriteValueRISCV64_OpRISCV64FMOVDload(v *Value) bool { | ||||
| 	v_1 := v.Args[1] | ||||
| 	v_0 := v.Args[0] | ||||
| 	b := v.Block | ||||
| 	config := b.Func.Config | ||||
| 	// match: (FMOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) | ||||
| 	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) | ||||
| 	// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem) | ||||
| 	for { | ||||
| 		off1 := auxIntToInt32(v.AuxInt) | ||||
| 		sym1 := auxToSym(v.Aux) | ||||
| 		if v_0.Op != OpRISCV64MOVaddr { | ||||
| 			break | ||||
| 		} | ||||
| 		off2 := auxIntToInt32(v_0.AuxInt) | ||||
| 		sym2 := auxToSym(v_0.Aux) | ||||
| 		base := v_0.Args[0] | ||||
| 		mem := v_1 | ||||
| 		if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMOVDload) | ||||
| 		v.AuxInt = int32ToAuxInt(off1 + off2) | ||||
| 		v.Aux = symToAux(mergeSym(sym1, sym2)) | ||||
| 		v.AddArg2(base, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (FMOVDload [off1] {sym} (ADDI [off2] base) mem) | ||||
| 	// cond: is32Bit(int64(off1)+off2) | ||||
| 	// result: (FMOVDload [off1+int32(off2)] {sym} base mem) | ||||
| 	for { | ||||
| 		off1 := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		if v_0.Op != OpRISCV64ADDI { | ||||
| 			break | ||||
| 		} | ||||
| 		off2 := auxIntToInt64(v_0.AuxInt) | ||||
| 		base := v_0.Args[0] | ||||
| 		mem := v_1 | ||||
| 		if !(is32Bit(int64(off1) + off2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMOVDload) | ||||
| 		v.AuxInt = int32ToAuxInt(off1 + int32(off2)) | ||||
| 		v.Aux = symToAux(sym) | ||||
| 		v.AddArg2(base, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) | ||||
| 	// cond: isSamePtr(ptr1, ptr2) | ||||
| 	// result: (FMVDX x) | ||||
| 	for { | ||||
| 		off := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		ptr1 := v_0 | ||||
| 		if v_1.Op != OpRISCV64MOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { | ||||
| 			break | ||||
| 		} | ||||
| 		x := v_1.Args[1] | ||||
| 		ptr2 := v_1.Args[0] | ||||
| 		if !(isSamePtr(ptr1, ptr2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMVDX) | ||||
| 		v.AddArg(x) | ||||
| 		return true | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| func rewriteValueRISCV64_OpRISCV64FMOVDstore(v *Value) bool { | ||||
| 	v_2 := v.Args[2] | ||||
| 	v_1 := v.Args[1] | ||||
| 	v_0 := v.Args[0] | ||||
| 	b := v.Block | ||||
| 	config := b.Func.Config | ||||
| 	// match: (FMOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) | ||||
| 	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) | ||||
| 	// result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) | ||||
| 	for { | ||||
| 		off1 := auxIntToInt32(v.AuxInt) | ||||
| 		sym1 := auxToSym(v.Aux) | ||||
| 		if v_0.Op != OpRISCV64MOVaddr { | ||||
| 			break | ||||
| 		} | ||||
| 		off2 := auxIntToInt32(v_0.AuxInt) | ||||
| 		sym2 := auxToSym(v_0.Aux) | ||||
| 		base := v_0.Args[0] | ||||
| 		val := v_1 | ||||
| 		mem := v_2 | ||||
| 		if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMOVDstore) | ||||
| 		v.AuxInt = int32ToAuxInt(off1 + off2) | ||||
| 		v.Aux = symToAux(mergeSym(sym1, sym2)) | ||||
| 		v.AddArg3(base, val, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (FMOVDstore [off1] {sym} (ADDI [off2] base) val mem) | ||||
| 	// cond: is32Bit(int64(off1)+off2) | ||||
| 	// result: (FMOVDstore [off1+int32(off2)] {sym} base val mem) | ||||
| 	for { | ||||
| 		off1 := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		if v_0.Op != OpRISCV64ADDI { | ||||
| 			break | ||||
| 		} | ||||
| 		off2 := auxIntToInt64(v_0.AuxInt) | ||||
| 		base := v_0.Args[0] | ||||
| 		val := v_1 | ||||
| 		mem := v_2 | ||||
| 		if !(is32Bit(int64(off1) + off2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMOVDstore) | ||||
| 		v.AuxInt = int32ToAuxInt(off1 + int32(off2)) | ||||
| 		v.Aux = symToAux(sym) | ||||
| 		v.AddArg3(base, val, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| func rewriteValueRISCV64_OpRISCV64FMOVWload(v *Value) bool { | ||||
| 	v_1 := v.Args[1] | ||||
| 	v_0 := v.Args[0] | ||||
| 	b := v.Block | ||||
| 	config := b.Func.Config | ||||
| 	// match: (FMOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) | ||||
| 	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) | ||||
| 	// result: (FMOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) | ||||
| 	for { | ||||
| 		off1 := auxIntToInt32(v.AuxInt) | ||||
| 		sym1 := auxToSym(v.Aux) | ||||
| 		if v_0.Op != OpRISCV64MOVaddr { | ||||
| 			break | ||||
| 		} | ||||
| 		off2 := auxIntToInt32(v_0.AuxInt) | ||||
| 		sym2 := auxToSym(v_0.Aux) | ||||
| 		base := v_0.Args[0] | ||||
| 		mem := v_1 | ||||
| 		if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMOVWload) | ||||
| 		v.AuxInt = int32ToAuxInt(off1 + off2) | ||||
| 		v.Aux = symToAux(mergeSym(sym1, sym2)) | ||||
| 		v.AddArg2(base, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (FMOVWload [off1] {sym} (ADDI [off2] base) mem) | ||||
| 	// cond: is32Bit(int64(off1)+off2) | ||||
| 	// result: (FMOVWload [off1+int32(off2)] {sym} base mem) | ||||
| 	for { | ||||
| 		off1 := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		if v_0.Op != OpRISCV64ADDI { | ||||
| 			break | ||||
| 		} | ||||
| 		off2 := auxIntToInt64(v_0.AuxInt) | ||||
| 		base := v_0.Args[0] | ||||
| 		mem := v_1 | ||||
| 		if !(is32Bit(int64(off1) + off2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMOVWload) | ||||
| 		v.AuxInt = int32ToAuxInt(off1 + int32(off2)) | ||||
| 		v.Aux = symToAux(sym) | ||||
| 		v.AddArg2(base, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (FMOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) | ||||
| 	// cond: isSamePtr(ptr1, ptr2) | ||||
| 	// result: (FMVSX x) | ||||
| 	for { | ||||
| 		off := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		ptr1 := v_0 | ||||
| 		if v_1.Op != OpRISCV64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { | ||||
| 			break | ||||
| 		} | ||||
| 		x := v_1.Args[1] | ||||
| 		ptr2 := v_1.Args[0] | ||||
| 		if !(isSamePtr(ptr1, ptr2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMVSX) | ||||
| 		v.AddArg(x) | ||||
| 		return true | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| func rewriteValueRISCV64_OpRISCV64FMOVWstore(v *Value) bool { | ||||
| 	v_2 := v.Args[2] | ||||
| 	v_1 := v.Args[1] | ||||
| 	v_0 := v.Args[0] | ||||
| 	b := v.Block | ||||
| 	config := b.Func.Config | ||||
| 	// match: (FMOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) | ||||
| 	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) | ||||
| 	// result: (FMOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) | ||||
| 	for { | ||||
| 		off1 := auxIntToInt32(v.AuxInt) | ||||
| 		sym1 := auxToSym(v.Aux) | ||||
| 		if v_0.Op != OpRISCV64MOVaddr { | ||||
| 			break | ||||
| 		} | ||||
| 		off2 := auxIntToInt32(v_0.AuxInt) | ||||
| 		sym2 := auxToSym(v_0.Aux) | ||||
| 		base := v_0.Args[0] | ||||
| 		val := v_1 | ||||
| 		mem := v_2 | ||||
| 		if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMOVWstore) | ||||
| 		v.AuxInt = int32ToAuxInt(off1 + off2) | ||||
| 		v.Aux = symToAux(mergeSym(sym1, sym2)) | ||||
| 		v.AddArg3(base, val, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (FMOVWstore [off1] {sym} (ADDI [off2] base) val mem) | ||||
| 	// cond: is32Bit(int64(off1)+off2) | ||||
| 	// result: (FMOVWstore [off1+int32(off2)] {sym} base val mem) | ||||
| 	for { | ||||
| 		off1 := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		if v_0.Op != OpRISCV64ADDI { | ||||
| 			break | ||||
| 		} | ||||
| 		off2 := auxIntToInt64(v_0.AuxInt) | ||||
| 		base := v_0.Args[0] | ||||
| 		val := v_1 | ||||
| 		mem := v_2 | ||||
| 		if !(is32Bit(int64(off1) + off2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMOVWstore) | ||||
| 		v.AuxInt = int32ToAuxInt(off1 + int32(off2)) | ||||
| 		v.Aux = symToAux(sym) | ||||
| 		v.AddArg3(base, val, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| func rewriteValueRISCV64_OpRISCV64FMSUBD(v *Value) bool { | ||||
| 	v_2 := v.Args[2] | ||||
| 	v_1 := v.Args[1] | ||||
|  | @ -4977,6 +5229,25 @@ func rewriteValueRISCV64_OpRISCV64MOVDload(v *Value) bool { | |||
| 		v.AddArg2(base, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) | ||||
| 	// cond: isSamePtr(ptr1, ptr2) | ||||
| 	// result: (FMVXD x) | ||||
| 	for { | ||||
| 		off := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		ptr1 := v_0 | ||||
| 		if v_1.Op != OpRISCV64FMOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { | ||||
| 			break | ||||
| 		} | ||||
| 		x := v_1.Args[1] | ||||
| 		ptr2 := v_1.Args[0] | ||||
| 		if !(isSamePtr(ptr1, ptr2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMVXD) | ||||
| 		v.AddArg(x) | ||||
| 		return true | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| func rewriteValueRISCV64_OpRISCV64MOVDnop(v *Value) bool { | ||||
|  | @ -5658,6 +5929,7 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool { | |||
| 	v_0 := v.Args[0] | ||||
| 	b := v.Block | ||||
| 	config := b.Func.Config | ||||
| 	typ := &b.Func.Config.Types | ||||
| 	// match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) | ||||
| 	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) | ||||
| 	// result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem) | ||||
|  | @ -5701,6 +5973,27 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool { | |||
| 		v.AddArg2(base, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) | ||||
| 	// cond: isSamePtr(ptr1, ptr2) | ||||
| 	// result: (MOVWUreg (FMVXS x)) | ||||
| 	for { | ||||
| 		off := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		ptr1 := v_0 | ||||
| 		if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { | ||||
| 			break | ||||
| 		} | ||||
| 		x := v_1.Args[1] | ||||
| 		ptr2 := v_1.Args[0] | ||||
| 		if !(isSamePtr(ptr1, ptr2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64MOVWUreg) | ||||
| 		v0 := b.NewValue0(v_1.Pos, OpRISCV64FMVXS, typ.Int32) | ||||
| 		v0.AddArg(x) | ||||
| 		v.AddArg(v0) | ||||
| 		return true | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| func rewriteValueRISCV64_OpRISCV64MOVWUreg(v *Value) bool { | ||||
|  | @ -5891,6 +6184,25 @@ func rewriteValueRISCV64_OpRISCV64MOVWload(v *Value) bool { | |||
| 		v.AddArg2(base, mem) | ||||
| 		return true | ||||
| 	} | ||||
| 	// match: (MOVWload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) | ||||
| 	// cond: isSamePtr(ptr1, ptr2) | ||||
| 	// result: (FMVXS x) | ||||
| 	for { | ||||
| 		off := auxIntToInt32(v.AuxInt) | ||||
| 		sym := auxToSym(v.Aux) | ||||
| 		ptr1 := v_0 | ||||
| 		if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { | ||||
| 			break | ||||
| 		} | ||||
| 		x := v_1.Args[1] | ||||
| 		ptr2 := v_1.Args[0] | ||||
| 		if !(isSamePtr(ptr1, ptr2)) { | ||||
| 			break | ||||
| 		} | ||||
| 		v.reset(OpRISCV64FMVXS) | ||||
| 		v.AddArg(x) | ||||
| 		return true | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| func rewriteValueRISCV64_OpRISCV64MOVWreg(v *Value) bool { | ||||
|  |  | |||
|  | @ -160,6 +160,7 @@ func fromFloat64(f64 float64) uint64 { | |||
| 	// loong64:"MOVV\tF.*, R.*" | ||||
| 	// ppc64x:"MFVSRD" | ||||
| 	// mips64/hardfloat:"MOVV\tF.*, R.*" | ||||
| 	// riscv64:"FMVXD" | ||||
| 	return math.Float64bits(f64+1) + 1 | ||||
| } | ||||
| 
 | ||||
|  | @ -168,6 +169,7 @@ func fromFloat32(f32 float32) uint32 { | |||
| 	// arm64:"FMOVS\tF.*, R.*" | ||||
| 	// loong64:"MOVW\tF.*, R.*" | ||||
| 	// mips64/hardfloat:"MOVW\tF.*, R.*" | ||||
| 	// riscv64:"FMVXW" | ||||
| 	return math.Float32bits(f32+1) + 1 | ||||
| } | ||||
| 
 | ||||
|  | @ -177,6 +179,7 @@ func toFloat64(u64 uint64) float64 { | |||
| 	// loong64:"MOVV\tR.*, F.*" | ||||
| 	// ppc64x:"MTVSRD" | ||||
| 	// mips64/hardfloat:"MOVV\tR.*, F.*" | ||||
| 	// riscv64:"FMVDX" | ||||
| 	return math.Float64frombits(u64+1) + 1 | ||||
| } | ||||
| 
 | ||||
|  | @ -185,6 +188,7 @@ func toFloat32(u32 uint32) float32 { | |||
| 	// arm64:"FMOVS\tR.*, F.*" | ||||
| 	// loong64:"MOVW\tR.*, F.*" | ||||
| 	// mips64/hardfloat:"MOVW\tR.*, F.*" | ||||
| 	// riscv64:"FMVWX" | ||||
| 	return math.Float32frombits(u32+1) + 1 | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Michael Munday
						Michael Munday