go/src/cmd/compile/internal/ssa/rewriteRISCV64.go

4410 lines
99 KiB
Go
Raw Normal View History

// Code generated from gen/RISCV64.rules; DO NOT EDIT.
// generated with: cd gen; go run *.go
package ssa
import "math"
import "cmd/compile/internal/types"
func rewriteValueRISCV64(v *Value) bool {
switch v.Op {
case OpAdd16:
v.Op = OpRISCV64ADD
return true
case OpAdd32:
v.Op = OpRISCV64ADD
return true
case OpAdd32F:
v.Op = OpRISCV64FADDS
return true
case OpAdd64:
v.Op = OpRISCV64ADD
return true
case OpAdd64F:
v.Op = OpRISCV64FADDD
return true
case OpAdd8:
v.Op = OpRISCV64ADD
return true
case OpAddPtr:
v.Op = OpRISCV64ADD
return true
case OpAddr:
v.Op = OpRISCV64MOVaddr
return true
case OpAnd16:
v.Op = OpRISCV64AND
return true
case OpAnd32:
v.Op = OpRISCV64AND
return true
case OpAnd64:
v.Op = OpRISCV64AND
return true
case OpAnd8:
v.Op = OpRISCV64AND
return true
case OpAndB:
v.Op = OpRISCV64AND
return true
case OpAvg64u:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpAvg64u(v)
case OpClosureCall:
v.Op = OpRISCV64CALLclosure
return true
case OpCom16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpCom16(v)
case OpCom32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpCom32(v)
case OpCom64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpCom64(v)
case OpCom8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpCom8(v)
case OpConst16:
v.Op = OpRISCV64MOVHconst
return true
case OpConst32:
v.Op = OpRISCV64MOVWconst
return true
case OpConst32F:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpConst32F(v)
case OpConst64:
v.Op = OpRISCV64MOVDconst
return true
case OpConst64F:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpConst64F(v)
case OpConst8:
v.Op = OpRISCV64MOVBconst
return true
case OpConstBool:
v.Op = OpRISCV64MOVBconst
return true
case OpConstNil:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpConstNil(v)
case OpConvert:
v.Op = OpRISCV64MOVconvert
return true
case OpCvt32Fto32:
v.Op = OpRISCV64FCVTWS
return true
case OpCvt32Fto64:
v.Op = OpRISCV64FCVTLS
return true
case OpCvt32Fto64F:
v.Op = OpRISCV64FCVTDS
return true
case OpCvt32to32F:
v.Op = OpRISCV64FCVTSW
return true
case OpCvt32to64F:
v.Op = OpRISCV64FCVTDW
return true
case OpCvt64Fto32:
v.Op = OpRISCV64FCVTWD
return true
case OpCvt64Fto32F:
v.Op = OpRISCV64FCVTSD
return true
case OpCvt64Fto64:
v.Op = OpRISCV64FCVTLD
return true
case OpCvt64to32F:
v.Op = OpRISCV64FCVTSL
return true
case OpCvt64to64F:
v.Op = OpRISCV64FCVTDL
return true
case OpCvtBoolToUint8:
v.Op = OpCopy
return true
case OpDiv16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpDiv16(v)
case OpDiv16u:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpDiv16u(v)
case OpDiv32:
v.Op = OpRISCV64DIVW
return true
case OpDiv32F:
v.Op = OpRISCV64FDIVS
return true
case OpDiv32u:
v.Op = OpRISCV64DIVUW
return true
case OpDiv64:
v.Op = OpRISCV64DIV
return true
case OpDiv64F:
v.Op = OpRISCV64FDIVD
return true
case OpDiv64u:
v.Op = OpRISCV64DIVU
return true
case OpDiv8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpDiv8(v)
case OpDiv8u:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpDiv8u(v)
case OpEq16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpEq16(v)
case OpEq32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpEq32(v)
case OpEq32F:
v.Op = OpRISCV64FEQS
return true
case OpEq64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpEq64(v)
case OpEq64F:
v.Op = OpRISCV64FEQD
return true
case OpEq8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpEq8(v)
case OpEqB:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpEqB(v)
case OpEqPtr:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpEqPtr(v)
case OpGeq32F:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpGeq32F(v)
case OpGeq64F:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpGeq64F(v)
case OpGetCallerPC:
v.Op = OpRISCV64LoweredGetCallerPC
return true
case OpGetCallerSP:
v.Op = OpRISCV64LoweredGetCallerSP
return true
case OpGetClosurePtr:
v.Op = OpRISCV64LoweredGetClosurePtr
return true
case OpGreater32F:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpGreater32F(v)
case OpGreater64F:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpGreater64F(v)
case OpHmul32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpHmul32(v)
case OpHmul32u:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpHmul32u(v)
case OpHmul64:
v.Op = OpRISCV64MULH
return true
case OpHmul64u:
v.Op = OpRISCV64MULHU
return true
case OpInterCall:
v.Op = OpRISCV64CALLinter
return true
case OpIsInBounds:
v.Op = OpLess64U
return true
case OpIsNonNil:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpIsNonNil(v)
case OpIsSliceInBounds:
v.Op = OpLeq64U
return true
case OpLeq16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLeq16(v)
case OpLeq16U:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLeq16U(v)
case OpLeq32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLeq32(v)
case OpLeq32F:
v.Op = OpRISCV64FLES
return true
case OpLeq32U:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLeq32U(v)
case OpLeq64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLeq64(v)
case OpLeq64F:
v.Op = OpRISCV64FLED
return true
case OpLeq64U:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLeq64U(v)
case OpLeq8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLeq8(v)
case OpLeq8U:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLeq8U(v)
case OpLess16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLess16(v)
case OpLess16U:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLess16U(v)
case OpLess32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLess32(v)
case OpLess32F:
v.Op = OpRISCV64FLTS
return true
case OpLess32U:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLess32U(v)
case OpLess64:
v.Op = OpRISCV64SLT
return true
case OpLess64F:
v.Op = OpRISCV64FLTD
return true
case OpLess64U:
v.Op = OpRISCV64SLTU
return true
case OpLess8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLess8(v)
case OpLess8U:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLess8U(v)
case OpLoad:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLoad(v)
case OpLocalAddr:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLocalAddr(v)
case OpLsh16x16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh16x16(v)
case OpLsh16x32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh16x32(v)
case OpLsh16x64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh16x64(v)
case OpLsh16x8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh16x8(v)
case OpLsh32x16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh32x16(v)
case OpLsh32x32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh32x32(v)
case OpLsh32x64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh32x64(v)
case OpLsh32x8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh32x8(v)
case OpLsh64x16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh64x16(v)
case OpLsh64x32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh64x32(v)
case OpLsh64x64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh64x64(v)
case OpLsh64x8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh64x8(v)
case OpLsh8x16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh8x16(v)
case OpLsh8x32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh8x32(v)
case OpLsh8x64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh8x64(v)
case OpLsh8x8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpLsh8x8(v)
case OpMod16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpMod16(v)
case OpMod16u:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpMod16u(v)
case OpMod32:
v.Op = OpRISCV64REMW
return true
case OpMod32u:
v.Op = OpRISCV64REMUW
return true
case OpMod64:
v.Op = OpRISCV64REM
return true
case OpMod64u:
v.Op = OpRISCV64REMU
return true
case OpMod8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpMod8(v)
case OpMod8u:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpMod8u(v)
case OpMove:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpMove(v)
case OpMul16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpMul16(v)
case OpMul32:
v.Op = OpRISCV64MULW
return true
case OpMul32F:
v.Op = OpRISCV64FMULS
return true
case OpMul64:
v.Op = OpRISCV64MUL
return true
case OpMul64F:
v.Op = OpRISCV64FMULD
return true
case OpMul8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpMul8(v)
case OpNeg16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeg16(v)
case OpNeg32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeg32(v)
case OpNeg32F:
v.Op = OpRISCV64FNEGS
return true
case OpNeg64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeg64(v)
case OpNeg64F:
v.Op = OpRISCV64FNEGD
return true
case OpNeg8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeg8(v)
case OpNeq16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeq16(v)
case OpNeq32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeq32(v)
case OpNeq32F:
v.Op = OpRISCV64FNES
return true
case OpNeq64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeq64(v)
case OpNeq64F:
v.Op = OpRISCV64FNED
return true
case OpNeq8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeq8(v)
case OpNeqB:
v.Op = OpRISCV64XOR
return true
case OpNeqPtr:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNeqPtr(v)
case OpNilCheck:
v.Op = OpRISCV64LoweredNilCheck
return true
case OpNot:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpNot(v)
case OpOffPtr:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpOffPtr(v)
case OpOr16:
v.Op = OpRISCV64OR
return true
case OpOr32:
v.Op = OpRISCV64OR
return true
case OpOr64:
v.Op = OpRISCV64OR
return true
case OpOr8:
v.Op = OpRISCV64OR
return true
case OpOrB:
v.Op = OpRISCV64OR
return true
case OpPanicBounds:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpPanicBounds(v)
case OpRISCV64ADD:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64ADD(v)
case OpRISCV64ADDI:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64ADDI(v)
case OpRISCV64MOVBUload:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVBUload(v)
case OpRISCV64MOVBload:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVBload(v)
case OpRISCV64MOVBstore:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVBstore(v)
case OpRISCV64MOVDconst:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVDconst(v)
case OpRISCV64MOVDload:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVDload(v)
case OpRISCV64MOVDstore:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVDstore(v)
case OpRISCV64MOVHUload:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVHUload(v)
case OpRISCV64MOVHload:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVHload(v)
case OpRISCV64MOVHstore:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVHstore(v)
case OpRISCV64MOVWUload:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVWUload(v)
case OpRISCV64MOVWload:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVWload(v)
case OpRISCV64MOVWstore:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRISCV64MOVWstore(v)
case OpRISCV64SUB:
return rewriteValueRISCV64_OpRISCV64SUB(v)
case OpRISCV64SUBW:
return rewriteValueRISCV64_OpRISCV64SUBW(v)
case OpRotateLeft16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRotateLeft16(v)
case OpRotateLeft32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRotateLeft32(v)
case OpRotateLeft64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRotateLeft64(v)
case OpRotateLeft8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRotateLeft8(v)
case OpRound32F:
v.Op = OpCopy
return true
case OpRound64F:
v.Op = OpCopy
return true
case OpRsh16Ux16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh16Ux16(v)
case OpRsh16Ux32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh16Ux32(v)
case OpRsh16Ux64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh16Ux64(v)
case OpRsh16Ux8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh16Ux8(v)
case OpRsh16x16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh16x16(v)
case OpRsh16x32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh16x32(v)
case OpRsh16x64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh16x64(v)
case OpRsh16x8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh16x8(v)
case OpRsh32Ux16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh32Ux16(v)
case OpRsh32Ux32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh32Ux32(v)
case OpRsh32Ux64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh32Ux64(v)
case OpRsh32Ux8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh32Ux8(v)
case OpRsh32x16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh32x16(v)
case OpRsh32x32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh32x32(v)
case OpRsh32x64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh32x64(v)
case OpRsh32x8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh32x8(v)
case OpRsh64Ux16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh64Ux16(v)
case OpRsh64Ux32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh64Ux32(v)
case OpRsh64Ux64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh64Ux64(v)
case OpRsh64Ux8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh64Ux8(v)
case OpRsh64x16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh64x16(v)
case OpRsh64x32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh64x32(v)
case OpRsh64x64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh64x64(v)
case OpRsh64x8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh64x8(v)
case OpRsh8Ux16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh8Ux16(v)
case OpRsh8Ux32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh8Ux32(v)
case OpRsh8Ux64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh8Ux64(v)
case OpRsh8Ux8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh8Ux8(v)
case OpRsh8x16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh8x16(v)
case OpRsh8x32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh8x32(v)
case OpRsh8x64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh8x64(v)
case OpRsh8x8:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpRsh8x8(v)
case OpSignExt16to32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpSignExt16to32(v)
case OpSignExt16to64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpSignExt16to64(v)
case OpSignExt32to64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpSignExt32to64(v)
case OpSignExt8to16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpSignExt8to16(v)
case OpSignExt8to32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpSignExt8to32(v)
case OpSignExt8to64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpSignExt8to64(v)
case OpSlicemask:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpSlicemask(v)
case OpSqrt:
v.Op = OpRISCV64FSQRTD
return true
case OpStaticCall:
v.Op = OpRISCV64CALLstatic
return true
case OpStore:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpStore(v)
case OpSub16:
v.Op = OpRISCV64SUB
return true
case OpSub32:
v.Op = OpRISCV64SUB
return true
case OpSub32F:
v.Op = OpRISCV64FSUBS
return true
case OpSub64:
v.Op = OpRISCV64SUB
return true
case OpSub64F:
v.Op = OpRISCV64FSUBD
return true
case OpSub8:
v.Op = OpRISCV64SUB
return true
case OpSubPtr:
v.Op = OpRISCV64SUB
return true
case OpTrunc16to8:
v.Op = OpCopy
return true
case OpTrunc32to16:
v.Op = OpCopy
return true
case OpTrunc32to8:
v.Op = OpCopy
return true
case OpTrunc64to16:
v.Op = OpCopy
return true
case OpTrunc64to32:
v.Op = OpCopy
return true
case OpTrunc64to8:
v.Op = OpCopy
return true
case OpWB:
v.Op = OpRISCV64LoweredWB
return true
case OpXor16:
v.Op = OpRISCV64XOR
return true
case OpXor32:
v.Op = OpRISCV64XOR
return true
case OpXor64:
v.Op = OpRISCV64XOR
return true
case OpXor8:
v.Op = OpRISCV64XOR
return true
case OpZero:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpZero(v)
case OpZeroExt16to32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpZeroExt16to32(v)
case OpZeroExt16to64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpZeroExt16to64(v)
case OpZeroExt32to64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpZeroExt32to64(v)
case OpZeroExt8to16:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpZeroExt8to16(v)
case OpZeroExt8to32:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpZeroExt8to32(v)
case OpZeroExt8to64:
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
return rewriteValueRISCV64_OpZeroExt8to64(v)
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpAvg64u(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Avg64u <t> x y)
// result: (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64ADD)
v0 := b.NewValue0(v.Pos, OpRISCV64ADD, t)
v1 := b.NewValue0(v.Pos, OpRISCV64SRLI, t)
v1.AuxInt = 1
v1.AddArg(x)
v2 := b.NewValue0(v.Pos, OpRISCV64SRLI, t)
v2.AuxInt = 1
v2.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, v2)
v3 := b.NewValue0(v.Pos, OpRISCV64ANDI, t)
v3.AuxInt = 1
v4 := b.NewValue0(v.Pos, OpRISCV64AND, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v4.AddArg2(x, y)
v3.AddArg(v4)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v3)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpCom16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
// match: (Com16 x)
// result: (XORI [int64(-1)] x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64XORI)
v.AuxInt = int64(-1)
v.AddArg(x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpCom32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
// match: (Com32 x)
// result: (XORI [int64(-1)] x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64XORI)
v.AuxInt = int64(-1)
v.AddArg(x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpCom64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
// match: (Com64 x)
// result: (XORI [int64(-1)] x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64XORI)
v.AuxInt = int64(-1)
v.AddArg(x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpCom8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
// match: (Com8 x)
// result: (XORI [int64(-1)] x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64XORI)
v.AuxInt = int64(-1)
v.AddArg(x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpConst32F(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (Const32F [val])
// result: (FMVSX (MOVWconst [int64(int32(math.Float32bits(float32(math.Float64frombits(uint64(val))))))]))
for {
val := v.AuxInt
v.reset(OpRISCV64FMVSX)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
v0.AuxInt = int64(int32(math.Float32bits(float32(math.Float64frombits(uint64(val))))))
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpConst64F(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (Const64F [val])
// result: (FMVDX (MOVDconst [val]))
for {
val := v.AuxInt
v.reset(OpRISCV64FMVDX)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v0.AuxInt = val
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpConstNil(v *Value) bool {
// match: (ConstNil)
// result: (MOVDconst [0])
for {
v.reset(OpRISCV64MOVDconst)
v.AuxInt = 0
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpDiv16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Div16 x y)
// result: (DIVW (SignExt16to32 x) (SignExt16to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64DIVW)
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpDiv16u(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Div16u x y)
// result: (DIVUW (ZeroExt16to32 x) (ZeroExt16to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64DIVUW)
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpDiv8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Div8 x y)
// result: (DIVW (SignExt8to32 x) (SignExt8to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64DIVW)
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpDiv8u(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Div8u x y)
// result: (DIVUW (ZeroExt8to32 x) (ZeroExt8to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64DIVUW)
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpEq16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Eq16 x y)
// result: (SEQZ (ZeroExt16to64 (SUB <x.Type> x y)))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SEQZ)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v1 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(x, y)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpEq32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Eq32 x y)
// result: (SEQZ (SUBW <x.Type> x y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SEQZ)
v0 := b.NewValue0(v.Pos, OpRISCV64SUBW, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpEq64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Eq64 x y)
// result: (SEQZ (SUB <x.Type> x y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SEQZ)
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpEq8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Eq8 x y)
// result: (SEQZ (ZeroExt8to64 (SUB <x.Type> x y)))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SEQZ)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v1 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(x, y)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpEqB(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqB x y)
// result: (XORI [1] (XOR <typ.Bool> x y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64XORI)
v.AuxInt = 1
v0 := b.NewValue0(v.Pos, OpRISCV64XOR, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpEqPtr(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (EqPtr x y)
// result: (SEQZ (SUB <x.Type> x y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SEQZ)
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpGeq32F(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Geq32F x y)
// result: (FLES y x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64FLES)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(y, x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpGeq64F(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Geq64F x y)
// result: (FLED y x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64FLED)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(y, x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpGreater32F(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Greater32F x y)
// result: (FLTS y x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64FLTS)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(y, x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpGreater64F(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Greater64F x y)
// result: (FLTD y x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64FLTD)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(y, x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpHmul32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Hmul32 x y)
// result: (SRAI [32] (MUL (SignExt32to64 x) (SignExt32to64 y)))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRAI)
v.AuxInt = 32
v0 := b.NewValue0(v.Pos, OpRISCV64MUL, typ.Int64)
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v1.AddArg(x)
v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v2.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, v2)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpHmul32u(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Hmul32u x y)
// result: (SRLI [32] (MUL (ZeroExt32to64 x) (ZeroExt32to64 y)))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRLI)
v.AuxInt = 32
v0 := b.NewValue0(v.Pos, OpRISCV64MUL, typ.Int64)
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v1.AddArg(x)
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v2.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, v2)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpIsNonNil(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (IsNonNil p)
// result: (NeqPtr (MOVDconst) p)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
p := v_0
v.reset(OpNeqPtr)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, p)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLeq16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Leq16 x y)
// result: (Not (Less16 y x))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpNot)
v0 := b.NewValue0(v.Pos, OpLess16, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLeq16U(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Leq16U x y)
// result: (Not (Less16U y x))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpNot)
v0 := b.NewValue0(v.Pos, OpLess16U, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLeq32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Leq32 x y)
// result: (Not (Less32 y x))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpNot)
v0 := b.NewValue0(v.Pos, OpLess32, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLeq32U(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Leq32U x y)
// result: (Not (Less32U y x))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpNot)
v0 := b.NewValue0(v.Pos, OpLess32U, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLeq64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Leq64 x y)
// result: (Not (Less64 y x))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpNot)
v0 := b.NewValue0(v.Pos, OpLess64, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLeq64U(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Leq64U x y)
// result: (Not (Less64U y x))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpNot)
v0 := b.NewValue0(v.Pos, OpLess64U, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLeq8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Leq8 x y)
// result: (Not (Less8 y x))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpNot)
v0 := b.NewValue0(v.Pos, OpLess8, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLeq8U(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Leq8U x y)
// result: (Not (Less8U y x))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpNot)
v0 := b.NewValue0(v.Pos, OpLess8U, typ.Bool)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLess16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Less16 x y)
// result: (SLT (SignExt16to64 x) (SignExt16to64 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SLT)
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLess16U(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Less16U x y)
// result: (SLTU (ZeroExt16to64 x) (ZeroExt16to64 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SLTU)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLess32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Less32 x y)
// result: (SLT (SignExt32to64 x) (SignExt32to64 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SLT)
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLess32U(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Less32U x y)
// result: (SLTU (ZeroExt32to64 x) (ZeroExt32to64 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SLTU)
v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLess8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Less8 x y)
// result: (SLT (SignExt8to64 x) (SignExt8to64 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SLT)
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLess8U(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Less8U x y)
// result: (SLTU (ZeroExt8to64 x) (ZeroExt8to64 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SLTU)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLoad(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Load <t> ptr mem)
// cond: t.IsBoolean()
// result: (MOVBUload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(t.IsBoolean()) {
break
}
v.reset(OpRISCV64MOVBUload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: ( is8BitInt(t) && isSigned(t))
// result: (MOVBload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is8BitInt(t) && isSigned(t)) {
break
}
v.reset(OpRISCV64MOVBload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: ( is8BitInt(t) && !isSigned(t))
// result: (MOVBUload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is8BitInt(t) && !isSigned(t)) {
break
}
v.reset(OpRISCV64MOVBUload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: (is16BitInt(t) && isSigned(t))
// result: (MOVHload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is16BitInt(t) && isSigned(t)) {
break
}
v.reset(OpRISCV64MOVHload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: (is16BitInt(t) && !isSigned(t))
// result: (MOVHUload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is16BitInt(t) && !isSigned(t)) {
break
}
v.reset(OpRISCV64MOVHUload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: (is32BitInt(t) && isSigned(t))
// result: (MOVWload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is32BitInt(t) && isSigned(t)) {
break
}
v.reset(OpRISCV64MOVWload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: (is32BitInt(t) && !isSigned(t))
// result: (MOVWUload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is32BitInt(t) && !isSigned(t)) {
break
}
v.reset(OpRISCV64MOVWUload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: (is64BitInt(t) || isPtr(t))
// result: (MOVDload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is64BitInt(t) || isPtr(t)) {
break
}
v.reset(OpRISCV64MOVDload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: is32BitFloat(t)
// result: (FMOVWload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is32BitFloat(t)) {
break
}
v.reset(OpRISCV64FMOVWload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
// match: (Load <t> ptr mem)
// cond: is64BitFloat(t)
// result: (FMOVDload ptr mem)
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
if !(is64BitFloat(t)) {
break
}
v.reset(OpRISCV64FMOVDload)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(ptr, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLocalAddr(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
// match: (LocalAddr {sym} base _)
// result: (MOVaddr {sym} base)
for {
sym := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
base := v_0
v.reset(OpRISCV64MOVaddr)
v.Aux = sym
v.AddArg(base)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh16x16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh16x16 <t> x y)
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg16, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh16x32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh16x32 <t> x y)
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg16, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh16x64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Lsh16x64 <t> x y)
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg16, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v2.AddArg(y)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh16x8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh16x8 <t> x y)
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg16, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh32x16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh32x16 <t> x y)
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg32, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh32x32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh32x32 <t> x y)
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg32, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh32x64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Lsh32x64 <t> x y)
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg32, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v2.AddArg(y)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh32x8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh32x8 <t> x y)
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg32, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh64x16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh64x16 <t> x y)
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg64, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh64x32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh64x32 <t> x y)
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg64, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh64x64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Lsh64x64 <t> x y)
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg64, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v2.AddArg(y)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh64x8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh64x8 <t> x y)
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg64, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh8x16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh8x16 <t> x y)
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg8, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh8x32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh8x32 <t> x y)
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg8, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh8x64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Lsh8x64 <t> x y)
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg8, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v2.AddArg(y)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpLsh8x8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Lsh8x8 <t> x y)
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg8, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpMod16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Mod16 x y)
// result: (REMW (SignExt16to32 x) (SignExt16to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64REMW)
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpMod16u(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Mod16u x y)
// result: (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64REMUW)
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpMod8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Mod8 x y)
// result: (REMW (SignExt8to32 x) (SignExt8to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64REMW)
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpMod8u(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Mod8u x y)
// result: (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64REMUW)
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpMove(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
typ := &b.Func.Config.Types
// match: (Move [0] _ _ mem)
// result: mem
for {
if v.AuxInt != 0 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_2
v.copyOf(mem)
return true
}
// match: (Move [1] dst src mem)
// result: (MOVBstore dst (MOVBload src mem) mem)
for {
if v.AuxInt != 1 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
dst := v_0
src := v_1
mem := v_2
v.reset(OpRISCV64MOVBstore)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(src, mem)
v.AddArg3(dst, v0, mem)
return true
}
// match: (Move [2] dst src mem)
// result: (MOVHstore dst (MOVHload src mem) mem)
for {
if v.AuxInt != 2 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
dst := v_0
src := v_1
mem := v_2
v.reset(OpRISCV64MOVHstore)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(src, mem)
v.AddArg3(dst, v0, mem)
return true
}
// match: (Move [4] dst src mem)
// result: (MOVWstore dst (MOVWload src mem) mem)
for {
if v.AuxInt != 4 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
dst := v_0
src := v_1
mem := v_2
v.reset(OpRISCV64MOVWstore)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(src, mem)
v.AddArg3(dst, v0, mem)
return true
}
// match: (Move [8] dst src mem)
// result: (MOVDstore dst (MOVDload src mem) mem)
for {
if v.AuxInt != 8 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
dst := v_0
src := v_1
mem := v_2
v.reset(OpRISCV64MOVDstore)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(src, mem)
v.AddArg3(dst, v0, mem)
return true
}
// match: (Move [s] {t} dst src mem)
// result: (LoweredMove [t.(*types.Type).Alignment()] dst src (ADDI <src.Type> [s-moveSize(t.(*types.Type).Alignment(), config)] src) mem)
for {
s := v.AuxInt
t := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
dst := v_0
src := v_1
mem := v_2
v.reset(OpRISCV64LoweredMove)
v.AuxInt = t.(*types.Type).Alignment()
v0 := b.NewValue0(v.Pos, OpRISCV64ADDI, src.Type)
v0.AuxInt = s - moveSize(t.(*types.Type).Alignment(), config)
v0.AddArg(src)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg4(dst, src, v0, mem)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpMul16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Mul16 x y)
// result: (MULW (SignExt16to32 x) (SignExt16to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64MULW)
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpMul8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Mul8 x y)
// result: (MULW (SignExt8to32 x) (SignExt8to32 y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64MULW)
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
v1.AddArg(y)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeg16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Neg16 x)
// result: (SUB (MOVHconst) x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SUB)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeg32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Neg32 x)
// result: (SUB (MOVWconst) x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SUB)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeg64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Neg64 x)
// result: (SUB (MOVDconst) x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SUB)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeg8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Neg8 x)
// result: (SUB (MOVBconst) x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SUB)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeq16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Neq16 x y)
// result: (SNEZ (ZeroExt16to64 (SUB <x.Type> x y)))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v1 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(x, y)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeq32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Neq32 x y)
// result: (SNEZ (SUBW <x.Type> x y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpRISCV64SUBW, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeq64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Neq64 x y)
// result: (SNEZ (SUB <x.Type> x y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeq8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Neq8 x y)
// result: (SNEZ (ZeroExt8to64 (SUB <x.Type> x y)))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v1 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(x, y)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNeqPtr(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (NeqPtr x y)
// result: (SNEZ (SUB <x.Type> x y))
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpNot(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
// match: (Not x)
// result: (XORI [1] x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64XORI)
v.AuxInt = 1
v.AddArg(x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpOffPtr(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (OffPtr [off] ptr:(SP))
// result: (MOVaddr [off] ptr)
for {
off := v.AuxInt
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
if ptr.Op != OpSP {
break
}
v.reset(OpRISCV64MOVaddr)
v.AuxInt = off
v.AddArg(ptr)
return true
}
// match: (OffPtr [off] ptr)
// cond: is32Bit(off)
// result: (ADDI [off] ptr)
for {
off := v.AuxInt
ptr := v_0
if !(is32Bit(off)) {
break
}
v.reset(OpRISCV64ADDI)
v.AuxInt = off
v.AddArg(ptr)
return true
}
// match: (OffPtr [off] ptr)
// result: (ADD (MOVDconst [off]) ptr)
for {
off := v.AuxInt
ptr := v_0
v.reset(OpRISCV64ADD)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v0.AuxInt = off
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, ptr)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpPanicBounds(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (PanicBounds [kind] x y mem)
// cond: boundsABI(kind) == 0
// result: (LoweredPanicBoundsA [kind] x y mem)
for {
kind := v.AuxInt
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
mem := v_2
if !(boundsABI(kind) == 0) {
break
}
v.reset(OpRISCV64LoweredPanicBoundsA)
v.AuxInt = kind
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(x, y, mem)
return true
}
// match: (PanicBounds [kind] x y mem)
// cond: boundsABI(kind) == 1
// result: (LoweredPanicBoundsB [kind] x y mem)
for {
kind := v.AuxInt
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
mem := v_2
if !(boundsABI(kind) == 1) {
break
}
v.reset(OpRISCV64LoweredPanicBoundsB)
v.AuxInt = kind
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(x, y, mem)
return true
}
// match: (PanicBounds [kind] x y mem)
// cond: boundsABI(kind) == 2
// result: (LoweredPanicBoundsC [kind] x y mem)
for {
kind := v.AuxInt
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
mem := v_2
if !(boundsABI(kind) == 2) {
break
}
v.reset(OpRISCV64LoweredPanicBoundsC)
v.AuxInt = kind
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(x, y, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64ADD(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (ADD (MOVDconst [off]) ptr)
// cond: is32Bit(off)
// result: (ADDI [off] ptr)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
cmd/compile: use loops to handle commutative ops in rules Prior to this change, we generated additional rules at rulegen time for all possible combinations of args to commutative ops. This is simple and works well, but leads to lots of generated rules. This in turn has increased the size of the compiler, made it hard to compile package ssa on small machines, and provided a disincentive to mark some ops as commutative. This change reworks how we handle commutative ops. Instead of generating a rule per argument permutation, we generate a series of nested loops, one for each commutative op. Each loop tries both possible argument orderings. I also considered attempting to canonicalize the inputs to the rewrite rules. However, because either or both arguments might be nothing more than an identifier, and because there can be arbitrary conditions to evaluate during matching, I did not see how to proceed. The duplicate rule detection now sorts arguments to commutative ops, so that it can detect commutative-only duplicates. There may be further optimizations to the new generated code. In particular, we may not be removing as many bounds checks as before; I have not investigated deeply. If more work here is needed, we could do it with more hints or with improvements to the prove pass. This change has almost no impact on the generated code. It does not pass toolstash-check, however. In a handful of functions, for reasons I do not understand, there are minor position changes. For the entire series ending at this change, there is negligible compiler performance impact. The compiler binary shrinks by about 15%, and package ssa shrinks by about 25%. Package ssa also compiles ~25% faster with ~25% less memory. Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4 Reviewed-on: https://go-review.googlesource.com/c/go/+/213703 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
if v_0.Op != OpRISCV64MOVDconst {
continue
}
off := v_0.AuxInt
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_1
cmd/compile: use loops to handle commutative ops in rules Prior to this change, we generated additional rules at rulegen time for all possible combinations of args to commutative ops. This is simple and works well, but leads to lots of generated rules. This in turn has increased the size of the compiler, made it hard to compile package ssa on small machines, and provided a disincentive to mark some ops as commutative. This change reworks how we handle commutative ops. Instead of generating a rule per argument permutation, we generate a series of nested loops, one for each commutative op. Each loop tries both possible argument orderings. I also considered attempting to canonicalize the inputs to the rewrite rules. However, because either or both arguments might be nothing more than an identifier, and because there can be arbitrary conditions to evaluate during matching, I did not see how to proceed. The duplicate rule detection now sorts arguments to commutative ops, so that it can detect commutative-only duplicates. There may be further optimizations to the new generated code. In particular, we may not be removing as many bounds checks as before; I have not investigated deeply. If more work here is needed, we could do it with more hints or with improvements to the prove pass. This change has almost no impact on the generated code. It does not pass toolstash-check, however. In a handful of functions, for reasons I do not understand, there are minor position changes. For the entire series ending at this change, there is negligible compiler performance impact. The compiler binary shrinks by about 15%, and package ssa shrinks by about 25%. Package ssa also compiles ~25% faster with ~25% less memory. Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4 Reviewed-on: https://go-review.googlesource.com/c/go/+/213703 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
if !(is32Bit(off)) {
continue
}
v.reset(OpRISCV64ADDI)
v.AuxInt = off
v.AddArg(ptr)
return true
}
cmd/compile: use loops to handle commutative ops in rules Prior to this change, we generated additional rules at rulegen time for all possible combinations of args to commutative ops. This is simple and works well, but leads to lots of generated rules. This in turn has increased the size of the compiler, made it hard to compile package ssa on small machines, and provided a disincentive to mark some ops as commutative. This change reworks how we handle commutative ops. Instead of generating a rule per argument permutation, we generate a series of nested loops, one for each commutative op. Each loop tries both possible argument orderings. I also considered attempting to canonicalize the inputs to the rewrite rules. However, because either or both arguments might be nothing more than an identifier, and because there can be arbitrary conditions to evaluate during matching, I did not see how to proceed. The duplicate rule detection now sorts arguments to commutative ops, so that it can detect commutative-only duplicates. There may be further optimizations to the new generated code. In particular, we may not be removing as many bounds checks as before; I have not investigated deeply. If more work here is needed, we could do it with more hints or with improvements to the prove pass. This change has almost no impact on the generated code. It does not pass toolstash-check, however. In a handful of functions, for reasons I do not understand, there are minor position changes. For the entire series ending at this change, there is negligible compiler performance impact. The compiler binary shrinks by about 15%, and package ssa shrinks by about 25%. Package ssa also compiles ~25% faster with ~25% less memory. Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4 Reviewed-on: https://go-review.googlesource.com/c/go/+/213703 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
break
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64ADDI(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
// match: (ADDI [c] (MOVaddr [d] {s} x))
// cond: is32Bit(c+d)
// result: (MOVaddr [c+d] {s} x)
for {
c := v.AuxInt
if v_0.Op != OpRISCV64MOVaddr {
break
}
d := v_0.AuxInt
s := v_0.Aux
x := v_0.Args[0]
if !(is32Bit(c + d)) {
break
}
v.reset(OpRISCV64MOVaddr)
v.AuxInt = c + d
v.Aux = s
v.AddArg(x)
return true
}
// match: (ADDI [0] x)
// result: x
for {
if v.AuxInt != 0 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.copyOf(x)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVBUload(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVBUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVBUload)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
// match: (MOVBUload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(off1+off2)
// result: (MOVBUload [off1+off2] {sym} base mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVBUload)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVBload(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVBload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVBload)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
// match: (MOVBload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(off1+off2)
// result: (MOVBload [off1+off2] {sym} base mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVBload)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVBstore(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVBstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
val := v_1
mem := v_2
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVBstore)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(base, val, mem)
return true
}
// match: (MOVBstore [off1] {sym} (ADDI [off2] base) val mem)
// cond: is32Bit(off1+off2)
// result: (MOVBstore [off1+off2] {sym} base val mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
val := v_1
mem := v_2
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVBstore)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(base, val, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVDconst(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (MOVDconst <t> [c])
// cond: !is32Bit(c) && int32(c) < 0
// result: (ADD (SLLI <t> [32] (MOVDconst [c>>32+1])) (MOVDconst [int64(int32(c))]))
for {
t := v.Type
c := v.AuxInt
if !(!is32Bit(c) && int32(c) < 0) {
break
}
v.reset(OpRISCV64ADD)
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 32
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v1.AuxInt = c>>32 + 1
v0.AddArg(v1)
v2 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v2.AuxInt = int64(int32(c))
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
// match: (MOVDconst <t> [c])
// cond: !is32Bit(c) && int32(c) >= 0
// result: (ADD (SLLI <t> [32] (MOVDconst [c>>32+0])) (MOVDconst [int64(int32(c))]))
for {
t := v.Type
c := v.AuxInt
if !(!is32Bit(c) && int32(c) >= 0) {
break
}
v.reset(OpRISCV64ADD)
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 32
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v1.AuxInt = c>>32 + 0
v0.AddArg(v1)
v2 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v2.AuxInt = int64(int32(c))
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVDload(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVDload)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
// match: (MOVDload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(off1+off2)
// result: (MOVDload [off1+off2] {sym} base mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVDload)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVDstore(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
val := v_1
mem := v_2
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVDstore)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(base, val, mem)
return true
}
// match: (MOVDstore [off1] {sym} (ADDI [off2] base) val mem)
// cond: is32Bit(off1+off2)
// result: (MOVDstore [off1+off2] {sym} base val mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
val := v_1
mem := v_2
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVDstore)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(base, val, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVHUload(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVHUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVHUload)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
// match: (MOVHUload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(off1+off2)
// result: (MOVHUload [off1+off2] {sym} base mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVHUload)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVHload(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVHload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVHload)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
// match: (MOVHload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(off1+off2)
// result: (MOVHload [off1+off2] {sym} base mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVHload)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVHstore(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVHstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
val := v_1
mem := v_2
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVHstore)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(base, val, mem)
return true
}
// match: (MOVHstore [off1] {sym} (ADDI [off2] base) val mem)
// cond: is32Bit(off1+off2)
// result: (MOVHstore [off1+off2] {sym} base val mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
val := v_1
mem := v_2
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVHstore)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(base, val, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVWUload)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
// match: (MOVWUload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(off1+off2)
// result: (MOVWUload [off1+off2] {sym} base mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVWUload)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVWload(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVWload)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
// match: (MOVWload [off1] {sym} (ADDI [off2] base) mem)
// cond: is32Bit(off1+off2)
// result: (MOVWload [off1+off2] {sym} base mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVWload)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(base, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRISCV64MOVWstore(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
// result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
if v_0.Op != OpRISCV64MOVaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
val := v_1
mem := v_2
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpRISCV64MOVWstore)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(base, val, mem)
return true
}
// match: (MOVWstore [off1] {sym} (ADDI [off2] base) val mem)
// cond: is32Bit(off1+off2)
// result: (MOVWstore [off1+off2] {sym} base val mem)
for {
off1 := v.AuxInt
sym := v.Aux
if v_0.Op != OpRISCV64ADDI {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
val := v_1
mem := v_2
if !(is32Bit(off1 + off2)) {
break
}
v.reset(OpRISCV64MOVWstore)
v.AuxInt = off1 + off2
v.Aux = sym
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(base, val, mem)
return true
}
return false
}
func rewriteValueRISCV64_OpRISCV64SUB(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SUB x (MOVBconst [val]))
// cond: is32Bit(-val)
// result: (ADDI [-val] x)
for {
x := v_0
if v_1.Op != OpRISCV64MOVBconst {
break
}
val := v_1.AuxInt
if !(is32Bit(-val)) {
break
}
v.reset(OpRISCV64ADDI)
v.AuxInt = -val
v.AddArg(x)
return true
}
// match: (SUB x (MOVHconst [val]))
// cond: is32Bit(-val)
// result: (ADDI [-val] x)
for {
x := v_0
if v_1.Op != OpRISCV64MOVHconst {
break
}
val := v_1.AuxInt
if !(is32Bit(-val)) {
break
}
v.reset(OpRISCV64ADDI)
v.AuxInt = -val
v.AddArg(x)
return true
}
// match: (SUB x (MOVWconst [val]))
// cond: is32Bit(-val)
// result: (ADDI [-val] x)
for {
x := v_0
if v_1.Op != OpRISCV64MOVWconst {
break
}
val := v_1.AuxInt
if !(is32Bit(-val)) {
break
}
v.reset(OpRISCV64ADDI)
v.AuxInt = -val
v.AddArg(x)
return true
}
// match: (SUB x (MOVDconst [val]))
// cond: is32Bit(-val)
// result: (ADDI [-val] x)
for {
x := v_0
if v_1.Op != OpRISCV64MOVDconst {
break
}
val := v_1.AuxInt
if !(is32Bit(-val)) {
break
}
v.reset(OpRISCV64ADDI)
v.AuxInt = -val
v.AddArg(x)
return true
}
// match: (SUB x (MOVBconst [0]))
// result: x
for {
x := v_0
if v_1.Op != OpRISCV64MOVBconst || v_1.AuxInt != 0 {
break
}
v.copyOf(x)
return true
}
// match: (SUB x (MOVHconst [0]))
// result: x
for {
x := v_0
if v_1.Op != OpRISCV64MOVHconst || v_1.AuxInt != 0 {
break
}
v.copyOf(x)
return true
}
// match: (SUB x (MOVWconst [0]))
// result: x
for {
x := v_0
if v_1.Op != OpRISCV64MOVWconst || v_1.AuxInt != 0 {
break
}
v.copyOf(x)
return true
}
// match: (SUB x (MOVDconst [0]))
// result: x
for {
x := v_0
if v_1.Op != OpRISCV64MOVDconst || v_1.AuxInt != 0 {
break
}
v.copyOf(x)
return true
}
return false
}
func rewriteValueRISCV64_OpRISCV64SUBW(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SUBW x (MOVWconst [0]))
// result: (ADDIW [0] x)
for {
x := v_0
if v_1.Op != OpRISCV64MOVWconst || v_1.AuxInt != 0 {
break
}
v.reset(OpRISCV64ADDIW)
v.AuxInt = 0
v.AddArg(x)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRotateLeft16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (RotateLeft16 <t> x (MOVHconst [c]))
// result: (Or16 (Lsh16x64 <t> x (MOVHconst [c&15])) (Rsh16Ux64 <t> x (MOVHconst [-c&15])))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
if v_1.Op != OpRISCV64MOVHconst {
break
}
c := v_1.AuxInt
v.reset(OpOr16)
v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
v1 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
v1.AuxInt = c & 15
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, v1)
v2 := b.NewValue0(v.Pos, OpRsh16Ux64, t)
v3 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
v3.AuxInt = -c & 15
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v2.AddArg2(x, v3)
v.AddArg2(v0, v2)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRotateLeft32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (RotateLeft32 <t> x (MOVWconst [c]))
// result: (Or32 (Lsh32x64 <t> x (MOVWconst [c&31])) (Rsh32Ux64 <t> x (MOVWconst [-c&31])))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
if v_1.Op != OpRISCV64MOVWconst {
break
}
c := v_1.AuxInt
v.reset(OpOr32)
v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
v1 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
v1.AuxInt = c & 31
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, v1)
v2 := b.NewValue0(v.Pos, OpRsh32Ux64, t)
v3 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
v3.AuxInt = -c & 31
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v2.AddArg2(x, v3)
v.AddArg2(v0, v2)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRotateLeft64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (RotateLeft64 <t> x (MOVDconst [c]))
// result: (Or64 (Lsh64x64 <t> x (MOVDconst [c&63])) (Rsh64Ux64 <t> x (MOVDconst [-c&63])))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
if v_1.Op != OpRISCV64MOVDconst {
break
}
c := v_1.AuxInt
v.reset(OpOr64)
v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v1.AuxInt = c & 63
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, v1)
v2 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
v3 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v3.AuxInt = -c & 63
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v2.AddArg2(x, v3)
v.AddArg2(v0, v2)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRotateLeft8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (RotateLeft8 <t> x (MOVBconst [c]))
// result: (Or8 (Lsh8x64 <t> x (MOVBconst [c&7])) (Rsh8Ux64 <t> x (MOVBconst [-c&7])))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
if v_1.Op != OpRISCV64MOVBconst {
break
}
c := v_1.AuxInt
v.reset(OpOr8)
v0 := b.NewValue0(v.Pos, OpLsh8x64, t)
v1 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
v1.AuxInt = c & 7
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, v1)
v2 := b.NewValue0(v.Pos, OpRsh8Ux64, t)
v3 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
v3.AuxInt = -c & 7
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v2.AddArg2(x, v3)
v.AddArg2(v0, v2)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh16Ux16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh16Ux16 <t> x y)
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg16, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh16Ux32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh16Ux32 <t> x y)
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg16, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh16Ux64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh16Ux64 <t> x y)
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg16, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v3.AddArg(y)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh16Ux8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh16Ux8 <t> x y)
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg16, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh16x16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh16x16 <t> x y)
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh16x32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh16x32 <t> x y)
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh16x64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh16x64 <t> x y)
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v3.AddArg(y)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh16x8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh16x8 <t> x y)
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh32Ux16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh32Ux16 <t> x y)
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg32, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh32Ux32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh32Ux32 <t> x y)
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg32, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh32Ux64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh32Ux64 <t> x y)
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg32, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v3.AddArg(y)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh32Ux8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh32Ux8 <t> x y)
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg32, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh32x16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh32x16 <t> x y)
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh32x32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh32x32 <t> x y)
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh32x64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh32x64 <t> x y)
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v3.AddArg(y)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh32x8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh32x8 <t> x y)
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh64Ux16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh64Ux16 <t> x y)
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg64, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh64Ux32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh64Ux32 <t> x y)
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg64, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh64Ux64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Rsh64Ux64 <t> x y)
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg64, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v2.AddArg(y)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh64Ux8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh64Ux8 <t> x y)
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(x, y)
v1 := b.NewValue0(v.Pos, OpNeg64, t)
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh64x16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh64x16 <t> x y)
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v1.AuxInt = -1
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, v1)
v.AddArg2(x, v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh64x32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh64x32 <t> x y)
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v1.AuxInt = -1
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, v1)
v.AddArg2(x, v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh64x64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Rsh64x64 <t> x y)
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v1.AuxInt = -1
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v2.AuxInt = 64
v2.AddArg(y)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, v1)
v.AddArg2(x, v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh64x8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh64x8 <t> x y)
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v1.AuxInt = -1
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v2.AuxInt = 64
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v3.AddArg(y)
v2.AddArg(v3)
v1.AddArg(v2)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(y, v1)
v.AddArg2(x, v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh8Ux16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh8Ux16 <t> x y)
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg8, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh8Ux32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh8Ux32 <t> x y)
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg8, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh8Ux64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh8Ux64 <t> x y)
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] y)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg8, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v3.AddArg(y)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh8Ux8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh8Ux8 <t> x y)
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64AND)
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v1.AddArg(x)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(v1, y)
v2 := b.NewValue0(v.Pos, OpNeg8, t)
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg2(v0, v2)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh8x16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh8x16 <t> x y)
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh8x32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh8x32 <t> x y)
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh8x64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh8x64 <t> x y)
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v3.AddArg(y)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpRsh8x8(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Rsh8x8 <t> x y)
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
y := v_1
v.reset(OpRISCV64SRA)
v.Type = t
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
v2.AuxInt = -1
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
v3.AuxInt = 64
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v4.AddArg(y)
v3.AddArg(v4)
v2.AddArg(v3)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v1.AddArg2(y, v2)
v.AddArg2(v0, v1)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpSignExt16to32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (SignExt16to32 <t> x)
// result: (SRAI [48] (SLLI <t> [48] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRAI)
v.AuxInt = 48
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 48
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpSignExt16to64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (SignExt16to64 <t> x)
// result: (SRAI [48] (SLLI <t> [48] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRAI)
v.AuxInt = 48
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 48
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpSignExt32to64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
// match: (SignExt32to64 <t> x)
// result: (ADDIW [0] x)
for {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64ADDIW)
v.AuxInt = 0
v.AddArg(x)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpSignExt8to16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (SignExt8to16 <t> x)
// result: (SRAI [56] (SLLI <t> [56] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRAI)
v.AuxInt = 56
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 56
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpSignExt8to32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (SignExt8to32 <t> x)
// result: (SRAI [56] (SLLI <t> [56] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRAI)
v.AuxInt = 56
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 56
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpSignExt8to64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (SignExt8to64 <t> x)
// result: (SRAI [56] (SLLI <t> [56] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRAI)
v.AuxInt = 56
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 56
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpSlicemask(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (Slicemask <t> x)
// result: (XORI [-1] (SRAI <t> [63] (ADDI <t> [-1] x)))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64XORI)
v.AuxInt = -1
v0 := b.NewValue0(v.Pos, OpRISCV64SRAI, t)
v0.AuxInt = 63
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, t)
v1.AuxInt = -1
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpStore(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Store {t} ptr val mem)
// cond: t.(*types.Type).Size() == 1
// result: (MOVBstore ptr val mem)
for {
t := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
val := v_1
mem := v_2
if !(t.(*types.Type).Size() == 1) {
break
}
v.reset(OpRISCV64MOVBstore)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, val, mem)
return true
}
// match: (Store {t} ptr val mem)
// cond: t.(*types.Type).Size() == 2
// result: (MOVHstore ptr val mem)
for {
t := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
val := v_1
mem := v_2
if !(t.(*types.Type).Size() == 2) {
break
}
v.reset(OpRISCV64MOVHstore)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, val, mem)
return true
}
// match: (Store {t} ptr val mem)
// cond: t.(*types.Type).Size() == 4 && !is32BitFloat(val.Type)
// result: (MOVWstore ptr val mem)
for {
t := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
val := v_1
mem := v_2
if !(t.(*types.Type).Size() == 4 && !is32BitFloat(val.Type)) {
break
}
v.reset(OpRISCV64MOVWstore)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, val, mem)
return true
}
// match: (Store {t} ptr val mem)
// cond: t.(*types.Type).Size() == 8 && !is64BitFloat(val.Type)
// result: (MOVDstore ptr val mem)
for {
t := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
val := v_1
mem := v_2
if !(t.(*types.Type).Size() == 8 && !is64BitFloat(val.Type)) {
break
}
v.reset(OpRISCV64MOVDstore)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, val, mem)
return true
}
// match: (Store {t} ptr val mem)
// cond: t.(*types.Type).Size() == 4 && is32BitFloat(val.Type)
// result: (FMOVWstore ptr val mem)
for {
t := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
val := v_1
mem := v_2
if !(t.(*types.Type).Size() == 4 && is32BitFloat(val.Type)) {
break
}
v.reset(OpRISCV64FMOVWstore)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, val, mem)
return true
}
// match: (Store {t} ptr val mem)
// cond: t.(*types.Type).Size() == 8 && is64BitFloat(val.Type)
// result: (FMOVDstore ptr val mem)
for {
t := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
val := v_1
mem := v_2
if !(t.(*types.Type).Size() == 8 && is64BitFloat(val.Type)) {
break
}
v.reset(OpRISCV64FMOVDstore)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, val, mem)
return true
}
return false
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpZero(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
typ := &b.Func.Config.Types
// match: (Zero [0] _ mem)
// result: mem
for {
if v.AuxInt != 0 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
mem := v_1
v.copyOf(mem)
return true
}
// match: (Zero [1] ptr mem)
// result: (MOVBstore ptr (MOVBconst) mem)
for {
if v.AuxInt != 1 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
v.reset(OpRISCV64MOVBstore)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, v0, mem)
return true
}
// match: (Zero [2] ptr mem)
// result: (MOVHstore ptr (MOVHconst) mem)
for {
if v.AuxInt != 2 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
v.reset(OpRISCV64MOVHstore)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, v0, mem)
return true
}
// match: (Zero [4] ptr mem)
// result: (MOVWstore ptr (MOVWconst) mem)
for {
if v.AuxInt != 4 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
v.reset(OpRISCV64MOVWstore)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, v0, mem)
return true
}
// match: (Zero [8] ptr mem)
// result: (MOVDstore ptr (MOVDconst) mem)
for {
if v.AuxInt != 8 {
break
}
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
v.reset(OpRISCV64MOVDstore)
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v.AddArg3(ptr, v0, mem)
return true
}
// match: (Zero [s] {t} ptr mem)
// result: (LoweredZero [t.(*types.Type).Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.(*types.Type).Alignment(), config)])) mem)
for {
s := v.AuxInt
t := v.Aux
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
ptr := v_0
mem := v_1
v.reset(OpRISCV64LoweredZero)
v.AuxInt = t.(*types.Type).Alignment()
v0 := b.NewValue0(v.Pos, OpRISCV64ADD, ptr.Type)
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
v1.AuxInt = s - moveSize(t.(*types.Type).Alignment(), config)
cmd/compile: add specialized AddArgN functions for rewrite rules This shrinks the compiler without impacting performance. (The performance-sensitive part of rewrite rules is the non-match case.) Passes toolstash-check -all. Executable size: file before after Δ % compile 20356168 20163960 -192208 -0.944% total 115599376 115407168 -192208 -0.166% Text size: file before after Δ % cmd/compile/internal/ssa.s 3928309 3778774 -149535 -3.807% total 18862943 18713408 -149535 -0.793% Memory allocated compiling package SSA: SSA 12.7M ± 0% 12.5M ± 0% -1.74% (p=0.008 n=5+5) Compiler speed impact: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 2% ~ (p=0.832 n=49+49) Unicode 82.8ms ± 2% 83.2ms ± 2% +0.44% (p=0.022 n=46+49) GoTypes 726ms ± 1% 728ms ± 2% ~ (p=0.076 n=46+48) Compiler 3.39s ± 2% 3.40s ± 2% ~ (p=0.633 n=48+49) SSA 7.71s ± 1% 7.65s ± 1% -0.78% (p=0.000 n=45+44) Flate 134ms ± 1% 134ms ± 1% ~ (p=0.195 n=50+49) GoParser 167ms ± 1% 167ms ± 1% ~ (p=0.390 n=47+47) Reflect 453ms ± 3% 452ms ± 2% ~ (p=0.492 n=48+49) Tar 184ms ± 3% 184ms ± 2% ~ (p=0.862 n=50+48) XML 248ms ± 2% 248ms ± 2% ~ (p=0.096 n=49+47) [Geo mean] 415ms 415ms -0.03% name old user-time/op new user-time/op delta Template 273ms ± 1% 273ms ± 2% ~ (p=0.711 n=48+48) Unicode 117ms ± 6% 117ms ± 5% ~ (p=0.633 n=50+50) GoTypes 972ms ± 2% 974ms ± 1% +0.29% (p=0.016 n=47+49) Compiler 4.46s ± 6% 4.51s ± 6% ~ (p=0.093 n=50+50) SSA 10.4s ± 1% 10.3s ± 2% -0.94% (p=0.000 n=45+50) Flate 166ms ± 2% 167ms ± 2% ~ (p=0.148 n=49+48) GoParser 202ms ± 1% 202ms ± 2% -0.28% (p=0.014 n=47+49) Reflect 594ms ± 2% 594ms ± 2% ~ (p=0.717 n=48+49) Tar 224ms ± 2% 224ms ± 2% ~ (p=0.805 n=50+49) XML 311ms ± 1% 310ms ± 1% ~ (p=0.177 n=49+48) [Geo mean] 537ms 537ms +0.01% Change-Id: I562b9f349b34ddcff01771769e6dbbc80604da7a Reviewed-on: https://go-review.googlesource.com/c/go/+/221237 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-02-26 11:29:34 -08:00
v0.AddArg2(ptr, v1)
v.AddArg3(ptr, v0, mem)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpZeroExt16to32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (ZeroExt16to32 <t> x)
// result: (SRLI [48] (SLLI <t> [48] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRLI)
v.AuxInt = 48
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 48
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpZeroExt16to64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (ZeroExt16to64 <t> x)
// result: (SRLI [48] (SLLI <t> [48] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRLI)
v.AuxInt = 48
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 48
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpZeroExt32to64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (ZeroExt32to64 <t> x)
// result: (SRLI [32] (SLLI <t> [32] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRLI)
v.AuxInt = 32
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 32
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpZeroExt8to16(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (ZeroExt8to16 <t> x)
// result: (SRLI [56] (SLLI <t> [56] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRLI)
v.AuxInt = 56
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 56
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpZeroExt8to32(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (ZeroExt8to32 <t> x)
// result: (SRLI [56] (SLLI <t> [56] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRLI)
v.AuxInt = 56
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 56
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
cmd/compile: remove chunking of rewrite rules We added chunking of rewrite rules to speed up compiling package SSA. This series of changes has significantly shrunk the number of rewrite rules, and they are no longer being added nearly as fast. Now that we are sharing v.Args across multiple rewrite rules, there is additional benefit to having more rules in a single function. Removing chunking now has an incidental impact on compiling package SSA, marginally speeds up other compilation, shrinks the cmd/compile binary, and simplifies the code. name old time/op new time/op delta Template 211ms ± 2% 210ms ± 2% -0.50% (p=0.000 n=91+97) Unicode 81.9ms ± 3% 81.8ms ± 3% ~ (p=0.179 n=96+91) GoTypes 731ms ± 2% 731ms ± 1% ~ (p=0.442 n=94+96) Compiler 3.43s ± 2% 3.41s ± 2% -0.36% (p=0.001 n=98+94) SSA 8.30s ± 2% 8.32s ± 2% +0.19% (p=0.034 n=94+95) Flate 135ms ± 2% 134ms ± 1% -0.30% (p=0.006 n=98+94) GoParser 167ms ± 1% 167ms ± 1% -0.22% (p=0.001 n=92+94) Reflect 453ms ± 2% 453ms ± 3% ~ (p=0.306 n=98+97) Tar 184ms ± 2% 183ms ± 2% -0.31% (p=0.012 n=94+94) XML 249ms ± 2% 248ms ± 1% -0.26% (p=0.002 n=96+92) [Geo mean] 419ms 418ms -0.21% name old user-time/op new user-time/op delta Template 273ms ± 2% 272ms ± 2% -0.46% (p=0.000 n=93+96) Unicode 116ms ± 4% 117ms ± 4% ~ (p=0.433 n=98+98) GoTypes 977ms ± 2% 977ms ± 1% ~ (p=0.971 n=92+99) Compiler 4.56s ± 6% 4.53s ± 6% ~ (p=0.081 n=100+100) SSA 11.1s ± 2% 11.1s ± 2% ~ (p=0.064 n=99+96) Flate 167ms ± 2% 167ms ± 1% -0.24% (p=0.004 n=95+96) GoParser 203ms ± 1% 203ms ± 2% -0.14% (p=0.049 n=96+97) Reflect 595ms ± 2% 595ms ± 2% ~ (p=0.544 n=95+92) Tar 225ms ± 2% 224ms ± 2% ~ (p=0.562 n=99+99) XML 312ms ± 2% 311ms ± 1% ~ (p=0.050 n=97+93) [Geo mean] 543ms 542ms -0.13% Change-Id: I8d34ab59f154b28f20c6f9e416b976bfce339baa Reviewed-on: https://go-review.googlesource.com/c/go/+/216220 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-21 20:53:30 -08:00
func rewriteValueRISCV64_OpZeroExt8to64(v *Value) bool {
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
v_0 := v.Args[0]
b := v.Block
// match: (ZeroExt8to64 <t> x)
// result: (SRLI [56] (SLLI <t> [56] x))
for {
t := v.Type
cmd/compile: reduce bounds checks in generated rewrite rules CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
x := v_0
v.reset(OpRISCV64SRLI)
v.AuxInt = 56
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
v0.AuxInt = 56
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteBlockRISCV64(b *Block) bool {
switch b.Kind {
case BlockRISCV64BNE:
// match: (BNE (SNEZ x) yes no)
// result: (BNE x yes no)
for b.Controls[0].Op == OpRISCV64SNEZ {
v_0 := b.Controls[0]
x := v_0.Args[0]
b.resetWithControl(BlockRISCV64BNE, x)
return true
}
case BlockIf:
// match: (If cond yes no)
// result: (BNE cond yes no)
for {
cond := b.Controls[0]
b.resetWithControl(BlockRISCV64BNE, cond)
return true
}
}
return false
}