mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
ded9a1b372
commit
e244a7a7d3
7 changed files with 1755 additions and 150 deletions
|
|
@ -139,6 +139,7 @@ func init() {
|
|||
gp1flags = regInfo{inputs: []regMask{gpg}}
|
||||
gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
|
||||
gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
|
||||
gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
|
||||
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
|
||||
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
|
||||
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
|
||||
|
|
@ -231,14 +232,16 @@ func init() {
|
|||
{name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2)
|
||||
|
||||
// shifts
|
||||
{name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64
|
||||
{name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt
|
||||
{name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64
|
||||
{name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned
|
||||
{name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64
|
||||
{name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed
|
||||
{name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits
|
||||
{name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
|
||||
{name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64
|
||||
{name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt
|
||||
{name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64
|
||||
{name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned
|
||||
{name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64
|
||||
{name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed
|
||||
{name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits
|
||||
{name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
|
||||
{name: "EXTRconst", argLength: 2, reg: gp21, asm: "EXTR", aux: "Int64"}, // extract 64 bits from arg0:arg1 starting at lsb auxInt
|
||||
{name: "EXTRWconst", argLength: 2, reg: gp21, asm: "EXTRW", aux: "Int64"}, // extract 32 bits from arg0[31:0]:arg1[31:0] starting at lsb auxInt and zero top 32 bits
|
||||
|
||||
// comparisons
|
||||
{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1
|
||||
|
|
@ -281,6 +284,21 @@ func init() {
|
|||
{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
|
||||
{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
|
||||
|
||||
// bitfield ops
|
||||
// for all bitfield ops lsb is auxInt>>8, width is auxInt&0xff
|
||||
// insert low width bits of arg1 into the result starting at bit lsb, copy other bits from arg0
|
||||
{name: "BFI", argLength: 2, reg: gp21nog, asm: "BFI", aux: "Int64", resultInArg0: true},
|
||||
// extract width bits of arg1 starting at bit lsb and insert at low end of result, copy other bits from arg0
|
||||
{name: "BFXIL", argLength: 2, reg: gp21nog, asm: "BFXIL", aux: "Int64", resultInArg0: true},
|
||||
// insert low width bits of arg0 into the result starting at bit lsb, bits to the left of the inserted bit field are set to the high/sign bit of the inserted bit field, bits to the right are zeroed
|
||||
{name: "SBFIZ", argLength: 1, reg: gp11, asm: "SBFIZ", aux: "Int64"},
|
||||
// extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are set to the high/sign bit of the extracted bitfield
|
||||
{name: "SBFX", argLength: 1, reg: gp11, asm: "SBFX", aux: "Int64"},
|
||||
// insert low width bits of arg0 into the result starting at bit lsb, bits to the left and right of the inserted bit field are zeroed
|
||||
{name: "UBFIZ", argLength: 1, reg: gp11, asm: "UBFIZ", aux: "Int64"},
|
||||
// extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are zeroed
|
||||
{name: "UBFX", argLength: 1, reg: gp11, asm: "UBFX", aux: "Int64"},
|
||||
|
||||
// moves
|
||||
{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true}, // 32 low bits of auxint
|
||||
{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue