go/src/cmd/compile/internal/ssa/gen/generic.rules

1787 lines
87 KiB
Text
Raw Normal View History

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Simplifications that apply to all backend architectures. As an example, this
// Go source code
//
// y := 0 * x
//
// can be translated into y := 0 without losing any information, which saves a
// pointless multiplication instruction. Other .rules files in this directory
// (for example AMD64.rules) contain rules specific to the architecture in the
// filename. The rules here apply to every architecture.
//
// The code for parsing this file lives in rulegen.go; this file generates
// ssa/rewritegeneric.go.
// values are specified using the following format:
// (op <type> [auxint] {aux} arg0 arg1 ...)
// the type, aux, and auxint fields are optional
// on the matching side
// - the type, aux, and auxint fields must match if they are specified.
// - the first occurrence of a variable defines that variable. Subsequent
// uses must match (be == to) the first use.
// - v is defined to be the value matched.
// - an additional conditional can be provided after the match pattern with "&&".
// on the generated side
// - the type of the top-level expression is the same as the one on the left-hand side.
// - the type of any subexpressions must be specified explicitly (or
// be specified in the op's type field).
// - auxint will be 0 if not specified.
// - aux will be nil if not specified.
// blocks are specified using the following format:
// (kind controlvalue succ0 succ1 ...)
// controlvalue must be "nil" or a value expression
// succ* fields must be variables
// For now, the generated successors must be a permutation of the matched successors.
// constant folding
(Trunc16to8 (Const16 [c])) -> (Const8 [int64(int8(c))])
(Trunc32to8 (Const32 [c])) -> (Const8 [int64(int8(c))])
(Trunc32to16 (Const32 [c])) -> (Const16 [int64(int16(c))])
(Trunc64to8 (Const64 [c])) -> (Const8 [int64(int8(c))])
(Trunc64to16 (Const64 [c])) -> (Const16 [int64(int16(c))])
(Trunc64to32 (Const64 [c])) -> (Const32 [int64(int32(c))])
(Cvt64Fto32F (Const64F [c])) -> (Const32F [auxFrom32F(float32(auxTo64F(c)))])
(Cvt32Fto64F (Const32F [c])) -> (Const64F [c]) // c is already a 64 bit float
(Cvt32to32F (Const32 [c])) -> (Const32F [auxFrom32F(float32(int32(c)))])
(Cvt32to64F (Const32 [c])) -> (Const64F [auxFrom64F(float64(int32(c)))])
(Cvt64to32F (Const64 [c])) -> (Const32F [auxFrom32F(float32(c))])
(Cvt64to64F (Const64 [c])) -> (Const64F [auxFrom64F(float64(c))])
(Cvt32Fto32 (Const32F [c])) -> (Const32 [int64(int32(auxTo32F(c)))])
(Cvt32Fto64 (Const32F [c])) -> (Const64 [int64(auxTo32F(c))])
(Cvt64Fto32 (Const64F [c])) -> (Const32 [int64(int32(auxTo64F(c)))])
(Cvt64Fto64 (Const64F [c])) -> (Const64 [int64(auxTo64F(c))])
(Round32F x:(Const32F)) -> x
(Round64F x:(Const64F)) -> x
(Trunc16to8 (ZeroExt8to16 x)) -> x
(Trunc32to8 (ZeroExt8to32 x)) -> x
(Trunc32to16 (ZeroExt8to32 x)) -> (ZeroExt8to16 x)
(Trunc32to16 (ZeroExt16to32 x)) -> x
(Trunc64to8 (ZeroExt8to64 x)) -> x
(Trunc64to16 (ZeroExt8to64 x)) -> (ZeroExt8to16 x)
(Trunc64to16 (ZeroExt16to64 x)) -> x
(Trunc64to32 (ZeroExt8to64 x)) -> (ZeroExt8to32 x)
(Trunc64to32 (ZeroExt16to64 x)) -> (ZeroExt16to32 x)
(Trunc64to32 (ZeroExt32to64 x)) -> x
(Trunc16to8 (SignExt8to16 x)) -> x
(Trunc32to8 (SignExt8to32 x)) -> x
(Trunc32to16 (SignExt8to32 x)) -> (SignExt8to16 x)
(Trunc32to16 (SignExt16to32 x)) -> x
(Trunc64to8 (SignExt8to64 x)) -> x
(Trunc64to16 (SignExt8to64 x)) -> (SignExt8to16 x)
(Trunc64to16 (SignExt16to64 x)) -> x
(Trunc64to32 (SignExt8to64 x)) -> (SignExt8to32 x)
(Trunc64to32 (SignExt16to64 x)) -> (SignExt16to32 x)
(Trunc64to32 (SignExt32to64 x)) -> x
(ZeroExt8to16 (Const8 [c])) -> (Const16 [int64( uint8(c))])
(ZeroExt8to32 (Const8 [c])) -> (Const32 [int64( uint8(c))])
(ZeroExt8to64 (Const8 [c])) -> (Const64 [int64( uint8(c))])
(ZeroExt16to32 (Const16 [c])) -> (Const32 [int64(uint16(c))])
(ZeroExt16to64 (Const16 [c])) -> (Const64 [int64(uint16(c))])
(ZeroExt32to64 (Const32 [c])) -> (Const64 [int64(uint32(c))])
(SignExt8to16 (Const8 [c])) -> (Const16 [int64( int8(c))])
(SignExt8to32 (Const8 [c])) -> (Const32 [int64( int8(c))])
(SignExt8to64 (Const8 [c])) -> (Const64 [int64( int8(c))])
(SignExt16to32 (Const16 [c])) -> (Const32 [int64( int16(c))])
(SignExt16to64 (Const16 [c])) -> (Const64 [int64( int16(c))])
(SignExt32to64 (Const32 [c])) -> (Const64 [int64( int32(c))])
(Neg8 (Const8 [c])) -> (Const8 [int64( -int8(c))])
(Neg16 (Const16 [c])) -> (Const16 [int64(-int16(c))])
(Neg32 (Const32 [c])) -> (Const32 [int64(-int32(c))])
(Neg64 (Const64 [c])) -> (Const64 [-c])
(Neg32F (Const32F [c])) && auxTo32F(c) != 0 -> (Const32F [auxFrom32F(-auxTo32F(c))])
(Neg64F (Const64F [c])) && auxTo64F(c) != 0 -> (Const64F [auxFrom64F(-auxTo64F(c))])
(Add8 (Const8 [c]) (Const8 [d])) -> (Const8 [int64(int8(c+d))])
(Add16 (Const16 [c]) (Const16 [d])) -> (Const16 [int64(int16(c+d))])
(Add32 (Const32 [c]) (Const32 [d])) -> (Const32 [int64(int32(c+d))])
(Add64 (Const64 [c]) (Const64 [d])) -> (Const64 [c+d])
(Add32F (Const32F [c]) (Const32F [d])) -> (Const32F [auxFrom32F(auxTo32F(c) + auxTo32F(d))])
(Add64F (Const64F [c]) (Const64F [d])) -> (Const64F [auxFrom64F(auxTo64F(c) + auxTo64F(d))])
(AddPtr <t> x (Const64 [c])) -> (OffPtr <t> x [c])
(AddPtr <t> x (Const32 [c])) -> (OffPtr <t> x [c])
(Sub8 (Const8 [c]) (Const8 [d])) -> (Const8 [int64(int8(c-d))])
(Sub16 (Const16 [c]) (Const16 [d])) -> (Const16 [int64(int16(c-d))])
(Sub32 (Const32 [c]) (Const32 [d])) -> (Const32 [int64(int32(c-d))])
(Sub64 (Const64 [c]) (Const64 [d])) -> (Const64 [c-d])
(Sub32F (Const32F [c]) (Const32F [d])) -> (Const32F [auxFrom32F(auxTo32F(c) - auxTo32F(d))])
(Sub64F (Const64F [c]) (Const64F [d])) -> (Const64F [auxFrom64F(auxTo64F(c) - auxTo64F(d))])
(Mul8 (Const8 [c]) (Const8 [d])) -> (Const8 [int64(int8(c*d))])
(Mul16 (Const16 [c]) (Const16 [d])) -> (Const16 [int64(int16(c*d))])
(Mul32 (Const32 [c]) (Const32 [d])) -> (Const32 [int64(int32(c*d))])
(Mul64 (Const64 [c]) (Const64 [d])) -> (Const64 [c*d])
(Mul32F (Const32F [c]) (Const32F [d])) -> (Const32F [auxFrom32F(auxTo32F(c) * auxTo32F(d))])
(Mul64F (Const64F [c]) (Const64F [d])) -> (Const64F [auxFrom64F(auxTo64F(c) * auxTo64F(d))])
cmd/compile/ssa: more aggressive constant folding Add rewrite rules that canonicalize the location of constants in expressions, and fold conststants that appear in operations that can be trivially reassociated. After this change, the compiler constant-folds expressions like "4 + x - 1" and "4 & x & 1" Benchmarks affected on darwin/amd64: name old time/op new time/op delta FmtFprintfInt-8 82.1ns ± 1% 81.7ns ± 1% -0.46% (p=0.023 n=8+9) FmtFprintfIntInt-8 122ns ± 2% 120ns ± 2% -1.48% (p=0.047 n=10+10) FmtManyArgs-8 493ns ± 0% 486ns ± 1% -1.37% (p=0.000 n=8+10) Gzip-8 230ms ± 0% 229ms ± 1% -0.46% (p=0.001 n=10+9) HTTPClientServer-8 74.5µs ± 1% 73.7µs ± 1% -1.11% (p=0.000 n=10+10) JSONDecode-8 51.7ms ± 0% 51.9ms ± 1% +0.42% (p=0.017 n=10+9) RegexpMatchEasy0_32-8 82.6ns ± 1% 81.7ns ± 0% -1.02% (p=0.000 n=9+8) RegexpMatchMedium_32-8 121ns ± 1% 120ns ± 1% -1.48% (p=0.001 n=10+10) Revcomp-8 426ms ± 1% 400ms ± 1% -6.16% (p=0.000 n=10+10) TimeFormat-8 330ns ± 1% 327ns ± 0% -0.82% (p=0.000 n=10+10) name old speed new speed delta Gzip-8 84.4MB/s ± 0% 84.8MB/s ± 1% +0.47% (p=0.001 n=10+9) JSONDecode-8 37.6MB/s ± 0% 37.4MB/s ± 1% -0.42% (p=0.016 n=10+9) RegexpMatchEasy0_32-8 387MB/s ± 1% 392MB/s ± 0% +1.06% (p=0.000 n=9+8) RegexpMatchMedium_32-8 8.21MB/s ± 1% 8.34MB/s ± 1% +1.58% (p=0.000 n=10+9) Revcomp-8 597MB/s ± 1% 636MB/s ± 1% +6.57% (p=0.000 n=10+10) Change-Id: Ie37ff91605b76a984a8400dfd1e34f50bf61c864 Reviewed-on: https://go-review.googlesource.com/37290 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-20 08:43:54 -08:00
(And8 (Const8 [c]) (Const8 [d])) -> (Const8 [int64(int8(c&d))])
(And16 (Const16 [c]) (Const16 [d])) -> (Const16 [int64(int16(c&d))])
(And32 (Const32 [c]) (Const32 [d])) -> (Const32 [int64(int32(c&d))])
(And64 (Const64 [c]) (Const64 [d])) -> (Const64 [c&d])
(Or8 (Const8 [c]) (Const8 [d])) -> (Const8 [int64(int8(c|d))])
(Or16 (Const16 [c]) (Const16 [d])) -> (Const16 [int64(int16(c|d))])
(Or32 (Const32 [c]) (Const32 [d])) -> (Const32 [int64(int32(c|d))])
(Or64 (Const64 [c]) (Const64 [d])) -> (Const64 [c|d])
(Xor8 (Const8 [c]) (Const8 [d])) -> (Const8 [int64(int8(c^d))])
(Xor16 (Const16 [c]) (Const16 [d])) -> (Const16 [int64(int16(c^d))])
(Xor32 (Const32 [c]) (Const32 [d])) -> (Const32 [int64(int32(c^d))])
(Xor64 (Const64 [c]) (Const64 [d])) -> (Const64 [c^d])
(Div8 (Const8 [c]) (Const8 [d])) && d != 0 -> (Const8 [int64(int8(c)/int8(d))])
(Div16 (Const16 [c]) (Const16 [d])) && d != 0 -> (Const16 [int64(int16(c)/int16(d))])
(Div32 (Const32 [c]) (Const32 [d])) && d != 0 -> (Const32 [int64(int32(c)/int32(d))])
(Div64 (Const64 [c]) (Const64 [d])) && d != 0 -> (Const64 [c/d])
(Div8u (Const8 [c]) (Const8 [d])) && d != 0 -> (Const8 [int64(int8(uint8(c)/uint8(d)))])
(Div16u (Const16 [c]) (Const16 [d])) && d != 0 -> (Const16 [int64(int16(uint16(c)/uint16(d)))])
(Div32u (Const32 [c]) (Const32 [d])) && d != 0 -> (Const32 [int64(int32(uint32(c)/uint32(d)))])
(Div64u (Const64 [c]) (Const64 [d])) && d != 0 -> (Const64 [int64(uint64(c)/uint64(d))])
(Div32F (Const32F [c]) (Const32F [d])) -> (Const32F [auxFrom32F(auxTo32F(c) / auxTo32F(d))])
(Div64F (Const64F [c]) (Const64F [d])) -> (Const64F [auxFrom64F(auxTo64F(c) / auxTo64F(d))])
(Select0 (Div128u (Const64 [0]) lo y)) -> (Div64u lo y)
(Select1 (Div128u (Const64 [0]) lo y)) -> (Mod64u lo y)
(Not (ConstBool [c])) -> (ConstBool [1-c])
// Convert x * 1 to x.
(Mul(8|16|32|64) (Const(8|16|32|64) [1]) x) -> x
// Convert x * -1 to -x.
(Mul(8|16|32|64) (Const(8|16|32|64) [-1]) x) -> (Neg(8|16|32|64) x)
// Convert multiplication by a power of two to a shift.
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Mul8 <t> n (Const8 [c])) && isPowerOfTwo(c) -> (Lsh8x64 <t> n (Const64 <typ.UInt64> [log2(c)]))
(Mul16 <t> n (Const16 [c])) && isPowerOfTwo(c) -> (Lsh16x64 <t> n (Const64 <typ.UInt64> [log2(c)]))
(Mul32 <t> n (Const32 [c])) && isPowerOfTwo(c) -> (Lsh32x64 <t> n (Const64 <typ.UInt64> [log2(c)]))
(Mul64 <t> n (Const64 [c])) && isPowerOfTwo(c) -> (Lsh64x64 <t> n (Const64 <typ.UInt64> [log2(c)]))
(Mul8 <t> n (Const8 [c])) && t.IsSigned() && isPowerOfTwo(-c) -> (Neg8 (Lsh8x64 <t> n (Const64 <typ.UInt64> [log2(-c)])))
(Mul16 <t> n (Const16 [c])) && t.IsSigned() && isPowerOfTwo(-c) -> (Neg16 (Lsh16x64 <t> n (Const64 <typ.UInt64> [log2(-c)])))
(Mul32 <t> n (Const32 [c])) && t.IsSigned() && isPowerOfTwo(-c) -> (Neg32 (Lsh32x64 <t> n (Const64 <typ.UInt64> [log2(-c)])))
(Mul64 <t> n (Const64 [c])) && t.IsSigned() && isPowerOfTwo(-c) -> (Neg64 (Lsh64x64 <t> n (Const64 <typ.UInt64> [log2(-c)])))
(Mod8 (Const8 [c]) (Const8 [d])) && d != 0 -> (Const8 [int64(int8(c % d))])
(Mod16 (Const16 [c]) (Const16 [d])) && d != 0 -> (Const16 [int64(int16(c % d))])
(Mod32 (Const32 [c]) (Const32 [d])) && d != 0 -> (Const32 [int64(int32(c % d))])
(Mod64 (Const64 [c]) (Const64 [d])) && d != 0 -> (Const64 [c % d])
(Mod8u (Const8 [c]) (Const8 [d])) && d != 0 -> (Const8 [int64(uint8(c) % uint8(d))])
(Mod16u (Const16 [c]) (Const16 [d])) && d != 0 -> (Const16 [int64(uint16(c) % uint16(d))])
(Mod32u (Const32 [c]) (Const32 [d])) && d != 0 -> (Const32 [int64(uint32(c) % uint32(d))])
(Mod64u (Const64 [c]) (Const64 [d])) && d != 0 -> (Const64 [int64(uint64(c) % uint64(d))])
(Lsh64x64 (Const64 [c]) (Const64 [d])) -> (Const64 [c << uint64(d)])
(Rsh64x64 (Const64 [c]) (Const64 [d])) -> (Const64 [c >> uint64(d)])
(Rsh64Ux64 (Const64 [c]) (Const64 [d])) -> (Const64 [int64(uint64(c) >> uint64(d))])
(Lsh32x64 (Const32 [c]) (Const64 [d])) -> (Const32 [int64(int32(c) << uint64(d))])
(Rsh32x64 (Const32 [c]) (Const64 [d])) -> (Const32 [int64(int32(c) >> uint64(d))])
(Rsh32Ux64 (Const32 [c]) (Const64 [d])) -> (Const32 [int64(int32(uint32(c) >> uint64(d)))])
(Lsh16x64 (Const16 [c]) (Const64 [d])) -> (Const16 [int64(int16(c) << uint64(d))])
(Rsh16x64 (Const16 [c]) (Const64 [d])) -> (Const16 [int64(int16(c) >> uint64(d))])
(Rsh16Ux64 (Const16 [c]) (Const64 [d])) -> (Const16 [int64(int16(uint16(c) >> uint64(d)))])
(Lsh8x64 (Const8 [c]) (Const64 [d])) -> (Const8 [int64(int8(c) << uint64(d))])
(Rsh8x64 (Const8 [c]) (Const64 [d])) -> (Const8 [int64(int8(c) >> uint64(d))])
(Rsh8Ux64 (Const8 [c]) (Const64 [d])) -> (Const8 [int64(int8(uint8(c) >> uint64(d)))])
// Fold IsInBounds when the range of the index cannot exceed the limit.
(IsInBounds (ZeroExt8to32 _) (Const32 [c])) && (1 << 8) <= c -> (ConstBool [1])
(IsInBounds (ZeroExt8to64 _) (Const64 [c])) && (1 << 8) <= c -> (ConstBool [1])
(IsInBounds (ZeroExt16to32 _) (Const32 [c])) && (1 << 16) <= c -> (ConstBool [1])
(IsInBounds (ZeroExt16to64 _) (Const64 [c])) && (1 << 16) <= c -> (ConstBool [1])
(IsInBounds x x) -> (ConstBool [0])
(IsInBounds (And8 (Const8 [c]) _) (Const8 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (ZeroExt8to16 (And8 (Const8 [c]) _)) (Const16 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (ZeroExt8to32 (And8 (Const8 [c]) _)) (Const32 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (ZeroExt8to64 (And8 (Const8 [c]) _)) (Const64 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (And16 (Const16 [c]) _) (Const16 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (ZeroExt16to32 (And16 (Const16 [c]) _)) (Const32 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (ZeroExt16to64 (And16 (Const16 [c]) _)) (Const64 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (And32 (Const32 [c]) _) (Const32 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (ZeroExt32to64 (And32 (Const32 [c]) _)) (Const64 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (And64 (Const64 [c]) _) (Const64 [d])) && 0 <= c && c < d -> (ConstBool [1])
(IsInBounds (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(0 <= c && c < d)])
(IsInBounds (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(0 <= c && c < d)])
// (Mod64u x y) is always between 0 (inclusive) and y (exclusive).
(IsInBounds (Mod32u _ y) y) -> (ConstBool [1])
(IsInBounds (Mod64u _ y) y) -> (ConstBool [1])
// Right shifting an unsigned number limits its value.
(IsInBounds (ZeroExt8to64 (Rsh8Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d -> (ConstBool [1])
(IsInBounds (ZeroExt8to32 (Rsh8Ux64 _ (Const64 [c]))) (Const32 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d -> (ConstBool [1])
(IsInBounds (ZeroExt8to16 (Rsh8Ux64 _ (Const64 [c]))) (Const16 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d -> (ConstBool [1])
(IsInBounds (Rsh8Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d -> (ConstBool [1])
(IsInBounds (ZeroExt16to64 (Rsh16Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d -> (ConstBool [1])
(IsInBounds (ZeroExt16to32 (Rsh16Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d -> (ConstBool [1])
(IsInBounds (Rsh16Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d -> (ConstBool [1])
(IsInBounds (ZeroExt32to64 (Rsh32Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 32 && 1<<uint(32-c)-1 < d -> (ConstBool [1])
(IsInBounds (Rsh32Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 32 && 1<<uint(32-c)-1 < d -> (ConstBool [1])
(IsInBounds (Rsh64Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 64 && 1<<uint(64-c)-1 < d -> (ConstBool [1])
(IsSliceInBounds x x) -> (ConstBool [1])
(IsSliceInBounds (And32 (Const32 [c]) _) (Const32 [d])) && 0 <= c && c <= d -> (ConstBool [1])
(IsSliceInBounds (And64 (Const64 [c]) _) (Const64 [d])) && 0 <= c && c <= d -> (ConstBool [1])
(IsSliceInBounds (Const32 [0]) _) -> (ConstBool [1])
(IsSliceInBounds (Const64 [0]) _) -> (ConstBool [1])
(IsSliceInBounds (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(0 <= c && c <= d)])
(IsSliceInBounds (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(0 <= c && c <= d)])
(IsSliceInBounds (SliceLen x) (SliceCap x)) -> (ConstBool [1])
(Eq(64|32|16|8) x x) -> (ConstBool [1])
(EqB (ConstBool [c]) (ConstBool [d])) -> (ConstBool [b2i(c == d)])
(EqB (ConstBool [0]) x) -> (Not x)
(EqB (ConstBool [1]) x) -> x
(Neq(64|32|16|8) x x) -> (ConstBool [0])
(NeqB (ConstBool [c]) (ConstBool [d])) -> (ConstBool [b2i(c != d)])
(NeqB (ConstBool [0]) x) -> x
(NeqB (ConstBool [1]) x) -> (Not x)
(NeqB (Not x) (Not y)) -> (NeqB x y)
(Eq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) -> (Eq64 (Const64 <t> [c-d]) x)
(Eq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) -> (Eq32 (Const32 <t> [int64(int32(c-d))]) x)
(Eq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) -> (Eq16 (Const16 <t> [int64(int16(c-d))]) x)
(Eq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) -> (Eq8 (Const8 <t> [int64(int8(c-d))]) x)
(Neq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) -> (Neq64 (Const64 <t> [c-d]) x)
(Neq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) -> (Neq32 (Const32 <t> [int64(int32(c-d))]) x)
(Neq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) -> (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
(Neq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) -> (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
cmd/compile: automatically handle commuting ops in rewrite rules Note that this is a redo of an undo of the original buggy CL 38666. We have lots of rewrite rules that vary only in the fact that we have 2 versions for the 2 different orderings of various commuting ops. For example: (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x) (ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x) It can get unwieldly quickly, especially when there is more than one commuting op in a rule. Our existing "fix" for this problem is to have rules that canonicalize the operations first. For example: (Eq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Eq64 (Const64 <t> [c]) x) Subsequent rules can then assume if there is a constant arg to Eq64, it will be the first one. This fix kinda works, but it is fragile and only works when we remember to include the required extra rules. The fundamental problem is that the rule matcher doesn't know anything about commuting ops. This CL fixes that fact. We already have information about which ops commute. (The register allocator takes advantage of commutivity.) The rule generator now automatically generates multiple rules for a single source rule when there are commutative ops in the rule. We can now drop all of our almost-duplicate source-level rules and the canonicalization rules. I have some CLs in progress that will be a lot less verbose when the rule generator handles commutivity for me. I had to reorganize the load-combining rules a bit. The 8-way OR rules generated 128 different reorderings, which was causing the generator to put too much code in the rewrite*.go files (the big ones were going from 25K lines to 132K lines). Instead I reorganized the rules to combine pairs of loads at a time. The generated rule files are now actually a bit (5%) smaller. Make.bash times are ~unchanged. Compiler benchmarks are not observably different. Probably because we don't spend much compiler time in rule matching anyway. I've also done a pass over all of our ops adding commutative markings for ops which hadn't had them previously. Fixes #18292 Change-Id: Ic1c0e43fbf579539f459971625f69690c9ab8805 Reviewed-on: https://go-review.googlesource.com/38801 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2017-03-30 03:30:22 +00:00
// Canonicalize x-const to x+(-const)
(Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [-c]) x)
(Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Add32 (Const32 <t> [int64(int32(-c))]) x)
(Sub16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Add16 (Const16 <t> [int64(int16(-c))]) x)
(Sub8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Add8 (Const8 <t> [int64(int8(-c))]) x)
// fold negation into comparison operators
(Not (Eq(64|32|16|8|B) x y)) -> (Neq(64|32|16|8|B) x y)
(Not (Neq(64|32|16|8|B) x y)) -> (Eq(64|32|16|8|B) x y)
(Not (Greater(64|32|16|8) x y)) -> (Leq(64|32|16|8) x y)
(Not (Greater(64|32|16|8)U x y)) -> (Leq(64|32|16|8)U x y)
(Not (Geq(64|32|16|8) x y)) -> (Less(64|32|16|8) x y)
(Not (Geq(64|32|16|8)U x y)) -> (Less(64|32|16|8)U x y)
(Not (Less(64|32|16|8) x y)) -> (Geq(64|32|16|8) x y)
(Not (Less(64|32|16|8)U x y)) -> (Geq(64|32|16|8)U x y)
(Not (Leq(64|32|16|8) x y)) -> (Greater(64|32|16|8) x y)
(Not (Leq(64|32|16|8)U x y)) -> (Greater(64|32|16|8)U x y)
// Distribute multiplication c * (d+x) -> c*d + c*x. Useful for:
// a[i].b = ...; a[i+1].b = ...
(Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x)) ->
(Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
(Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x)) ->
(Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
// Rewrite x*y ± x*z to x*(y±z)
(Add(64|32|16|8) <t> (Mul(64|32|16|8) x y) (Mul(64|32|16|8) x z))
-> (Mul(64|32|16|8) x (Add(64|32|16|8) <t> y z))
(Sub(64|32|16|8) <t> (Mul(64|32|16|8) x y) (Mul(64|32|16|8) x z))
-> (Mul(64|32|16|8) x (Sub(64|32|16|8) <t> y z))
// rewrite shifts of 8/16/32 bit consts into 64 bit consts to reduce
// the number of the other rewrite rules for const shifts
(Lsh64x32 <t> x (Const32 [c])) -> (Lsh64x64 x (Const64 <t> [int64(uint32(c))]))
(Lsh64x16 <t> x (Const16 [c])) -> (Lsh64x64 x (Const64 <t> [int64(uint16(c))]))
(Lsh64x8 <t> x (Const8 [c])) -> (Lsh64x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh64x32 <t> x (Const32 [c])) -> (Rsh64x64 x (Const64 <t> [int64(uint32(c))]))
(Rsh64x16 <t> x (Const16 [c])) -> (Rsh64x64 x (Const64 <t> [int64(uint16(c))]))
(Rsh64x8 <t> x (Const8 [c])) -> (Rsh64x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh64Ux32 <t> x (Const32 [c])) -> (Rsh64Ux64 x (Const64 <t> [int64(uint32(c))]))
(Rsh64Ux16 <t> x (Const16 [c])) -> (Rsh64Ux64 x (Const64 <t> [int64(uint16(c))]))
(Rsh64Ux8 <t> x (Const8 [c])) -> (Rsh64Ux64 x (Const64 <t> [int64(uint8(c))]))
(Lsh32x32 <t> x (Const32 [c])) -> (Lsh32x64 x (Const64 <t> [int64(uint32(c))]))
(Lsh32x16 <t> x (Const16 [c])) -> (Lsh32x64 x (Const64 <t> [int64(uint16(c))]))
(Lsh32x8 <t> x (Const8 [c])) -> (Lsh32x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh32x32 <t> x (Const32 [c])) -> (Rsh32x64 x (Const64 <t> [int64(uint32(c))]))
(Rsh32x16 <t> x (Const16 [c])) -> (Rsh32x64 x (Const64 <t> [int64(uint16(c))]))
(Rsh32x8 <t> x (Const8 [c])) -> (Rsh32x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh32Ux32 <t> x (Const32 [c])) -> (Rsh32Ux64 x (Const64 <t> [int64(uint32(c))]))
(Rsh32Ux16 <t> x (Const16 [c])) -> (Rsh32Ux64 x (Const64 <t> [int64(uint16(c))]))
(Rsh32Ux8 <t> x (Const8 [c])) -> (Rsh32Ux64 x (Const64 <t> [int64(uint8(c))]))
(Lsh16x32 <t> x (Const32 [c])) -> (Lsh16x64 x (Const64 <t> [int64(uint32(c))]))
(Lsh16x16 <t> x (Const16 [c])) -> (Lsh16x64 x (Const64 <t> [int64(uint16(c))]))
(Lsh16x8 <t> x (Const8 [c])) -> (Lsh16x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh16x32 <t> x (Const32 [c])) -> (Rsh16x64 x (Const64 <t> [int64(uint32(c))]))
(Rsh16x16 <t> x (Const16 [c])) -> (Rsh16x64 x (Const64 <t> [int64(uint16(c))]))
(Rsh16x8 <t> x (Const8 [c])) -> (Rsh16x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh16Ux32 <t> x (Const32 [c])) -> (Rsh16Ux64 x (Const64 <t> [int64(uint32(c))]))
(Rsh16Ux16 <t> x (Const16 [c])) -> (Rsh16Ux64 x (Const64 <t> [int64(uint16(c))]))
(Rsh16Ux8 <t> x (Const8 [c])) -> (Rsh16Ux64 x (Const64 <t> [int64(uint8(c))]))
(Lsh8x32 <t> x (Const32 [c])) -> (Lsh8x64 x (Const64 <t> [int64(uint32(c))]))
(Lsh8x16 <t> x (Const16 [c])) -> (Lsh8x64 x (Const64 <t> [int64(uint16(c))]))
(Lsh8x8 <t> x (Const8 [c])) -> (Lsh8x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh8x32 <t> x (Const32 [c])) -> (Rsh8x64 x (Const64 <t> [int64(uint32(c))]))
(Rsh8x16 <t> x (Const16 [c])) -> (Rsh8x64 x (Const64 <t> [int64(uint16(c))]))
(Rsh8x8 <t> x (Const8 [c])) -> (Rsh8x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh8Ux32 <t> x (Const32 [c])) -> (Rsh8Ux64 x (Const64 <t> [int64(uint32(c))]))
(Rsh8Ux16 <t> x (Const16 [c])) -> (Rsh8Ux64 x (Const64 <t> [int64(uint16(c))]))
(Rsh8Ux8 <t> x (Const8 [c])) -> (Rsh8Ux64 x (Const64 <t> [int64(uint8(c))]))
// shifts by zero
(Lsh(64|32|16|8)x64 x (Const64 [0])) -> x
(Rsh(64|32|16|8)x64 x (Const64 [0])) -> x
(Rsh(64|32|16|8)Ux64 x (Const64 [0])) -> x
// zero shifted
(Lsh64x(64|32|16|8) (Const64 [0]) _) -> (Const64 [0])
(Rsh64x(64|32|16|8) (Const64 [0]) _) -> (Const64 [0])
(Rsh64Ux(64|32|16|8) (Const64 [0]) _) -> (Const64 [0])
(Lsh32x(64|32|16|8) (Const32 [0]) _) -> (Const32 [0])
(Rsh32x(64|32|16|8) (Const32 [0]) _) -> (Const32 [0])
(Rsh32Ux(64|32|16|8) (Const32 [0]) _) -> (Const32 [0])
(Lsh16x(64|32|16|8) (Const16 [0]) _) -> (Const16 [0])
(Rsh16x(64|32|16|8) (Const16 [0]) _) -> (Const16 [0])
(Rsh16Ux(64|32|16|8) (Const16 [0]) _) -> (Const16 [0])
(Lsh8x(64|32|16|8) (Const8 [0]) _) -> (Const8 [0])
(Rsh8x(64|32|16|8) (Const8 [0]) _) -> (Const8 [0])
(Rsh8Ux(64|32|16|8) (Const8 [0]) _) -> (Const8 [0])
// large left shifts of all values, and right shifts of unsigned values
((Lsh64|Rsh64U)x64 _ (Const64 [c])) && uint64(c) >= 64 -> (Const64 [0])
((Lsh32|Rsh32U)x64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
((Lsh16|Rsh16U)x64 _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
((Lsh8|Rsh8U)x64 _ (Const64 [c])) && uint64(c) >= 8 -> (Const8 [0])
// combine const shifts
(Lsh64x64 <t> (Lsh64x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Lsh64x64 x (Const64 <t> [c+d]))
(Lsh32x64 <t> (Lsh32x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Lsh32x64 x (Const64 <t> [c+d]))
(Lsh16x64 <t> (Lsh16x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Lsh16x64 x (Const64 <t> [c+d]))
(Lsh8x64 <t> (Lsh8x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Lsh8x64 x (Const64 <t> [c+d]))
(Rsh64x64 <t> (Rsh64x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh64x64 x (Const64 <t> [c+d]))
(Rsh32x64 <t> (Rsh32x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh32x64 x (Const64 <t> [c+d]))
(Rsh16x64 <t> (Rsh16x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh16x64 x (Const64 <t> [c+d]))
(Rsh8x64 <t> (Rsh8x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh8x64 x (Const64 <t> [c+d]))
(Rsh64Ux64 <t> (Rsh64Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh64Ux64 x (Const64 <t> [c+d]))
(Rsh32Ux64 <t> (Rsh32Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh32Ux64 x (Const64 <t> [c+d]))
(Rsh16Ux64 <t> (Rsh16Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh16Ux64 x (Const64 <t> [c+d]))
(Rsh8Ux64 <t> (Rsh8Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh8Ux64 x (Const64 <t> [c+d]))
// Remove signed right shift before an unsigned right shift that extracts the sign bit.
(Rsh8Ux64 (Rsh8x64 x _) (Const64 <t> [7] )) -> (Rsh8Ux64 x (Const64 <t> [7] ))
(Rsh16Ux64 (Rsh16x64 x _) (Const64 <t> [15])) -> (Rsh16Ux64 x (Const64 <t> [15]))
(Rsh32Ux64 (Rsh32x64 x _) (Const64 <t> [31])) -> (Rsh32Ux64 x (Const64 <t> [31]))
(Rsh64Ux64 (Rsh64x64 x _) (Const64 <t> [63])) -> (Rsh64Ux64 x (Const64 <t> [63]))
// ((x >> c1) << c2) >> c3
(Rsh(64|32|16|8)Ux64 (Lsh(64|32|16|8)x64 (Rsh(64|32|16|8)Ux64 x (Const64 [c1])) (Const64 [c2])) (Const64 [c3]))
&& uint64(c1) >= uint64(c2) && uint64(c3) >= uint64(c2) && !uaddOvf(c1-c2, c3)
-> (Rsh(64|32|16|8)Ux64 x (Const64 <typ.UInt64> [c1-c2+c3]))
// ((x << c1) >> c2) << c3
(Lsh(64|32|16|8)x64 (Rsh(64|32|16|8)Ux64 (Lsh(64|32|16|8)x64 x (Const64 [c1])) (Const64 [c2])) (Const64 [c3]))
&& uint64(c1) >= uint64(c2) && uint64(c3) >= uint64(c2) && !uaddOvf(c1-c2, c3)
-> (Lsh(64|32|16|8)x64 x (Const64 <typ.UInt64> [c1-c2+c3]))
// (x >> c) & uppermask = 0
(And64 (Const64 [m]) (Rsh64Ux64 _ (Const64 [c]))) && c >= 64-ntz(m) -> (Const64 [0])
(And32 (Const32 [m]) (Rsh32Ux64 _ (Const64 [c]))) && c >= 64-ntz(m) -> (Const32 [0])
(And16 (Const16 [m]) (Rsh16Ux64 _ (Const64 [c]))) && c >= 64-ntz(m) -> (Const16 [0])
(And8 (Const8 [m]) (Rsh8Ux64 _ (Const64 [c]))) && c >= 64-ntz(m) -> (Const8 [0])
// (x << c) & lowermask = 0
(And64 (Const64 [m]) (Lsh64x64 _ (Const64 [c]))) && c >= 64-nlz(m) -> (Const64 [0])
(And32 (Const32 [m]) (Lsh32x64 _ (Const64 [c]))) && c >= 64-nlz(m) -> (Const32 [0])
(And16 (Const16 [m]) (Lsh16x64 _ (Const64 [c]))) && c >= 64-nlz(m) -> (Const16 [0])
(And8 (Const8 [m]) (Lsh8x64 _ (Const64 [c]))) && c >= 64-nlz(m) -> (Const8 [0])
// replace shifts with zero extensions
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh16Ux64 (Lsh16x64 x (Const64 [8])) (Const64 [8])) -> (ZeroExt8to16 (Trunc16to8 <typ.UInt8> x))
(Rsh32Ux64 (Lsh32x64 x (Const64 [24])) (Const64 [24])) -> (ZeroExt8to32 (Trunc32to8 <typ.UInt8> x))
(Rsh64Ux64 (Lsh64x64 x (Const64 [56])) (Const64 [56])) -> (ZeroExt8to64 (Trunc64to8 <typ.UInt8> x))
(Rsh32Ux64 (Lsh32x64 x (Const64 [16])) (Const64 [16])) -> (ZeroExt16to32 (Trunc32to16 <typ.UInt16> x))
(Rsh64Ux64 (Lsh64x64 x (Const64 [48])) (Const64 [48])) -> (ZeroExt16to64 (Trunc64to16 <typ.UInt16> x))
(Rsh64Ux64 (Lsh64x64 x (Const64 [32])) (Const64 [32])) -> (ZeroExt32to64 (Trunc64to32 <typ.UInt32> x))
// replace shifts with sign extensions
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh16x64 (Lsh16x64 x (Const64 [8])) (Const64 [8])) -> (SignExt8to16 (Trunc16to8 <typ.Int8> x))
(Rsh32x64 (Lsh32x64 x (Const64 [24])) (Const64 [24])) -> (SignExt8to32 (Trunc32to8 <typ.Int8> x))
(Rsh64x64 (Lsh64x64 x (Const64 [56])) (Const64 [56])) -> (SignExt8to64 (Trunc64to8 <typ.Int8> x))
(Rsh32x64 (Lsh32x64 x (Const64 [16])) (Const64 [16])) -> (SignExt16to32 (Trunc32to16 <typ.Int16> x))
(Rsh64x64 (Lsh64x64 x (Const64 [48])) (Const64 [48])) -> (SignExt16to64 (Trunc64to16 <typ.Int16> x))
(Rsh64x64 (Lsh64x64 x (Const64 [32])) (Const64 [32])) -> (SignExt32to64 (Trunc64to32 <typ.Int32> x))
// constant comparisons
(Eq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) -> (ConstBool [b2i(c == d)])
(Neq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) -> (ConstBool [b2i(c != d)])
(Greater(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) -> (ConstBool [b2i(c > d)])
(Geq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) -> (ConstBool [b2i(c >= d)])
(Less(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) -> (ConstBool [b2i(c < d)])
(Leq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) -> (ConstBool [b2i(c <= d)])
(Geq8 (And8 _ (Const8 [c])) (Const8 [0])) && int8(c) >= 0 -> (ConstBool [1])
(Geq16 (And16 _ (Const16 [c])) (Const16 [0])) && int16(c) >= 0 -> (ConstBool [1])
(Geq32 (And32 _ (Const32 [c])) (Const32 [0])) && int32(c) >= 0 -> (ConstBool [1])
(Geq64 (And64 _ (Const64 [c])) (Const64 [0])) && int64(c) >= 0 -> (ConstBool [1])
(Geq64 (Rsh64Ux64 _ (Const64 [c])) (Const64 [0])) && c > 0 -> (ConstBool [1])
(Greater64U (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(uint64(c) > uint64(d))])
(Greater32U (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(uint32(c) > uint32(d))])
(Greater16U (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(uint16(c) > uint16(d))])
(Greater8U (Const8 [c]) (Const8 [d])) -> (ConstBool [b2i(uint8(c) > uint8(d))])
(Geq64U (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(uint64(c) >= uint64(d))])
(Geq32U (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(uint32(c) >= uint32(d))])
(Geq16U (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(uint16(c) >= uint16(d))])
(Geq8U (Const8 [c]) (Const8 [d])) -> (ConstBool [b2i(uint8(c) >= uint8(d))])
(Less64U (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(uint64(c) < uint64(d))])
(Less32U (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(uint32(c) < uint32(d))])
(Less16U (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(uint16(c) < uint16(d))])
(Less8U (Const8 [c]) (Const8 [d])) -> (ConstBool [b2i(uint8(c) < uint8(d))])
(Leq64U (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(uint64(c) <= uint64(d))])
(Leq32U (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(uint32(c) <= uint32(d))])
(Leq16U (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(uint16(c) <= uint16(d))])
(Leq8U (Const8 [c]) (Const8 [d])) -> (ConstBool [b2i(uint8(c) <= uint8(d))])
// constant floating point comparisons
(Eq32F (Const32F [c]) (Const32F [d])) -> (ConstBool [b2i(auxTo32F(c) == auxTo32F(d))])
(Eq64F (Const64F [c]) (Const64F [d])) -> (ConstBool [b2i(auxTo64F(c) == auxTo64F(d))])
(Neq32F (Const32F [c]) (Const32F [d])) -> (ConstBool [b2i(auxTo32F(c) != auxTo32F(d))])
(Neq64F (Const64F [c]) (Const64F [d])) -> (ConstBool [b2i(auxTo64F(c) != auxTo64F(d))])
(Greater32F (Const32F [c]) (Const32F [d])) -> (ConstBool [b2i(auxTo32F(c) > auxTo32F(d))])
(Greater64F (Const64F [c]) (Const64F [d])) -> (ConstBool [b2i(auxTo64F(c) > auxTo64F(d))])
(Geq32F (Const32F [c]) (Const32F [d])) -> (ConstBool [b2i(auxTo32F(c) >= auxTo32F(d))])
(Geq64F (Const64F [c]) (Const64F [d])) -> (ConstBool [b2i(auxTo64F(c) >= auxTo64F(d))])
(Less32F (Const32F [c]) (Const32F [d])) -> (ConstBool [b2i(auxTo32F(c) < auxTo32F(d))])
(Less64F (Const64F [c]) (Const64F [d])) -> (ConstBool [b2i(auxTo64F(c) < auxTo64F(d))])
(Leq32F (Const32F [c]) (Const32F [d])) -> (ConstBool [b2i(auxTo32F(c) <= auxTo32F(d))])
(Leq64F (Const64F [c]) (Const64F [d])) -> (ConstBool [b2i(auxTo64F(c) <= auxTo64F(d))])
// simplifications
(Or(64|32|16|8) x x) -> x
(Or(64|32|16|8) (Const(64|32|16|8) [0]) x) -> x
(Or(64|32|16|8) (Const(64|32|16|8) [-1]) _) -> (Const(64|32|16|8) [-1])
(And(64|32|16|8) x x) -> x
(And(64|32|16|8) (Const(64|32|16|8) [-1]) x) -> x
(And(64|32|16|8) (Const(64|32|16|8) [0]) _) -> (Const(64|32|16|8) [0])
(Xor(64|32|16|8) x x) -> (Const(64|32|16|8) [0])
(Xor(64|32|16|8) (Const(64|32|16|8) [0]) x) -> x
(Add(64|32|16|8) (Const(64|32|16|8) [0]) x) -> x
(Sub(64|32|16|8) x x) -> (Const(64|32|16|8) [0])
(Mul(64|32|16|8) (Const(64|32|16|8) [0]) _) -> (Const(64|32|16|8) [0])
(Com(64|32|16|8) (Com(64|32|16|8) x)) -> x
(Com(64|32|16|8) (Const(64|32|16|8) [c])) -> (Const(64|32|16|8) [^c])
(Neg(64|32|16|8) (Sub(64|32|16|8) x y)) -> (Sub(64|32|16|8) y x)
(Add(64|32|16|8) (Const(64|32|16|8) [1]) (Com(64|32|16|8) x)) -> (Neg(64|32|16|8) x)
(And(64|32|16|8) x (And(64|32|16|8) x y)) -> (And(64|32|16|8) x y)
(Or(64|32|16|8) x (Or(64|32|16|8) x y)) -> (Or(64|32|16|8) x y)
(Xor(64|32|16|8) x (Xor(64|32|16|8) x y)) -> y
// Ands clear bits. Ors set bits.
// If a subsequent Or will set all the bits
// that an And cleared, we can skip the And.
// This happens in bitmasking code like:
// x &^= 3 << shift // clear two old bits
// x |= v << shift // set two new bits
// when shift is a small constant and v ends up a constant 3.
(Or8 (And8 x (Const8 [c2])) (Const8 <t> [c1])) && ^(c1 | c2) == 0 -> (Or8 (Const8 <t> [c1]) x)
(Or16 (And16 x (Const16 [c2])) (Const16 <t> [c1])) && ^(c1 | c2) == 0 -> (Or16 (Const16 <t> [c1]) x)
(Or32 (And32 x (Const32 [c2])) (Const32 <t> [c1])) && ^(c1 | c2) == 0 -> (Or32 (Const32 <t> [c1]) x)
(Or64 (And64 x (Const64 [c2])) (Const64 <t> [c1])) && ^(c1 | c2) == 0 -> (Or64 (Const64 <t> [c1]) x)
(Trunc64to8 (And64 (Const64 [y]) x)) && y&0xFF == 0xFF -> (Trunc64to8 x)
(Trunc64to16 (And64 (Const64 [y]) x)) && y&0xFFFF == 0xFFFF -> (Trunc64to16 x)
(Trunc64to32 (And64 (Const64 [y]) x)) && y&0xFFFFFFFF == 0xFFFFFFFF -> (Trunc64to32 x)
(Trunc32to8 (And32 (Const32 [y]) x)) && y&0xFF == 0xFF -> (Trunc32to8 x)
(Trunc32to16 (And32 (Const32 [y]) x)) && y&0xFFFF == 0xFFFF -> (Trunc32to16 x)
(Trunc16to8 (And16 (Const16 [y]) x)) && y&0xFF == 0xFF -> (Trunc16to8 x)
(ZeroExt8to64 (Trunc64to8 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 56 -> x
(ZeroExt16to64 (Trunc64to16 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 48 -> x
(ZeroExt32to64 (Trunc64to32 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 32 -> x
(ZeroExt8to32 (Trunc32to8 x:(Rsh32Ux64 _ (Const64 [s])))) && s >= 24 -> x
(ZeroExt16to32 (Trunc32to16 x:(Rsh32Ux64 _ (Const64 [s])))) && s >= 16 -> x
(ZeroExt8to16 (Trunc16to8 x:(Rsh16Ux64 _ (Const64 [s])))) && s >= 8 -> x
(SignExt8to64 (Trunc64to8 x:(Rsh64x64 _ (Const64 [s])))) && s >= 56 -> x
(SignExt16to64 (Trunc64to16 x:(Rsh64x64 _ (Const64 [s])))) && s >= 48 -> x
(SignExt32to64 (Trunc64to32 x:(Rsh64x64 _ (Const64 [s])))) && s >= 32 -> x
(SignExt8to32 (Trunc32to8 x:(Rsh32x64 _ (Const64 [s])))) && s >= 24 -> x
(SignExt16to32 (Trunc32to16 x:(Rsh32x64 _ (Const64 [s])))) && s >= 16 -> x
(SignExt8to16 (Trunc16to8 x:(Rsh16x64 _ (Const64 [s])))) && s >= 8 -> x
(Slicemask (Const32 [x])) && x > 0 -> (Const32 [-1])
(Slicemask (Const32 [0])) -> (Const32 [0])
(Slicemask (Const64 [x])) && x > 0 -> (Const64 [-1])
(Slicemask (Const64 [0])) -> (Const64 [0])
// Rewrite AND of consts as shifts if possible, slightly faster for 64 bit operands
// leading zeros can be shifted left, then right
(And64 <t> (Const64 [y]) x) && nlz(y) + nto(y) == 64 && nto(y) >= 32
-> (Rsh64Ux64 (Lsh64x64 <t> x (Const64 <t> [nlz(y)])) (Const64 <t> [nlz(y)]))
// trailing zeros can be shifted right, then left
(And64 <t> (Const64 [y]) x) && nlo(y) + ntz(y) == 64 && ntz(y) >= 32
-> (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
// simplifications often used for lengths. e.g. len(s[i:i+5])==5
(Sub(64|32|16|8) (Add(64|32|16|8) x y) x) -> y
(Sub(64|32|16|8) (Add(64|32|16|8) x y) y) -> x
// basic phi simplifications
(Phi (Const8 [c]) (Const8 [c])) -> (Const8 [c])
(Phi (Const16 [c]) (Const16 [c])) -> (Const16 [c])
(Phi (Const32 [c]) (Const32 [c])) -> (Const32 [c])
(Phi (Const64 [c]) (Const64 [c])) -> (Const64 [c])
// slice and interface comparisons
// The frontend ensures that we can only compare against nil,
// so we need only compare the first word (interface type or slice ptr).
(EqInter x y) -> (EqPtr (ITab x) (ITab y))
(NeqInter x y) -> (NeqPtr (ITab x) (ITab y))
(EqSlice x y) -> (EqPtr (SlicePtr x) (SlicePtr y))
(NeqSlice x y) -> (NeqPtr (SlicePtr x) (SlicePtr y))
// Load of store of same address, with compatibly typed value and same size
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Load <t1> p1 (Store {t2} p2 x _))
&& isSamePtr(p1, p2)
&& t1.Compare(x.Type) == types.CMPeq
&& t1.Size() == sizeof(t2)
-> x
(Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 x _)))
&& isSamePtr(p1, p3)
&& t1.Compare(x.Type) == types.CMPeq
&& t1.Size() == sizeof(t2)
&& disjoint(p3, sizeof(t3), p2, sizeof(t2))
-> x
(Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 x _))))
&& isSamePtr(p1, p4)
&& t1.Compare(x.Type) == types.CMPeq
&& t1.Size() == sizeof(t2)
&& disjoint(p4, sizeof(t4), p2, sizeof(t2))
&& disjoint(p4, sizeof(t4), p3, sizeof(t3))
-> x
(Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 x _)))))
&& isSamePtr(p1, p5)
&& t1.Compare(x.Type) == types.CMPeq
&& t1.Size() == sizeof(t2)
&& disjoint(p5, sizeof(t5), p2, sizeof(t2))
&& disjoint(p5, sizeof(t5), p3, sizeof(t3))
&& disjoint(p5, sizeof(t5), p4, sizeof(t4))
-> x
// Pass constants through math.Float{32,64}bits and math.Float{32,64}frombits
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Load <t1> p1 (Store {t2} p2 (Const64 [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 8 && is64BitFloat(t1) -> (Const64F [x])
(Load <t1> p1 (Store {t2} p2 (Const32 [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 4 && is32BitFloat(t1) -> (Const32F [auxFrom32F(math.Float32frombits(uint32(x)))])
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Load <t1> p1 (Store {t2} p2 (Const64F [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 8 && is64BitInt(t1) -> (Const64 [x])
(Load <t1> p1 (Store {t2} p2 (Const32F [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 4 && is32BitInt(t1) -> (Const32 [int64(int32(math.Float32bits(auxTo32F(x))))])
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
// Float Loads up to Zeros so they can be constant folded.
(Load <t1> op:(OffPtr [o1] p1)
(Store {t2} p2 _
mem:(Zero [n] p3 _)))
&& o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p3)
&& fe.CanSSA(t1)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& disjoint(op, t1.Size(), p2, sizeof(t2))
-> @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p3) mem)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Load <t1> op:(OffPtr [o1] p1)
(Store {t2} p2 _
(Store {t3} p3 _
mem:(Zero [n] p4 _))))
&& o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p4)
&& fe.CanSSA(t1)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& disjoint(op, t1.Size(), p2, sizeof(t2))
&& disjoint(op, t1.Size(), p3, sizeof(t3))
-> @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p4) mem)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Load <t1> op:(OffPtr [o1] p1)
(Store {t2} p2 _
(Store {t3} p3 _
(Store {t4} p4 _
mem:(Zero [n] p5 _)))))
&& o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p5)
&& fe.CanSSA(t1)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& disjoint(op, t1.Size(), p2, sizeof(t2))
&& disjoint(op, t1.Size(), p3, sizeof(t3))
&& disjoint(op, t1.Size(), p4, sizeof(t4))
-> @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p5) mem)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Load <t1> op:(OffPtr [o1] p1)
(Store {t2} p2 _
(Store {t3} p3 _
(Store {t4} p4 _
(Store {t5} p5 _
mem:(Zero [n] p6 _))))))
&& o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p6)
&& fe.CanSSA(t1)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& disjoint(op, t1.Size(), p2, sizeof(t2))
&& disjoint(op, t1.Size(), p3, sizeof(t3))
&& disjoint(op, t1.Size(), p4, sizeof(t4))
&& disjoint(op, t1.Size(), p5, sizeof(t5))
-> @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p6) mem)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
// Zero to Load forwarding.
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& t1.IsBoolean()
&& isSamePtr(p1, p2)
&& n >= o + 1
-> (ConstBool [0])
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is8BitInt(t1)
&& isSamePtr(p1, p2)
&& n >= o + 1
-> (Const8 [0])
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is16BitInt(t1)
&& isSamePtr(p1, p2)
&& n >= o + 2
-> (Const16 [0])
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is32BitInt(t1)
&& isSamePtr(p1, p2)
&& n >= o + 4
-> (Const32 [0])
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is64BitInt(t1)
&& isSamePtr(p1, p2)
&& n >= o + 8
-> (Const64 [0])
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is32BitFloat(t1)
&& isSamePtr(p1, p2)
&& n >= o + 4
-> (Const32F [0])
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is64BitFloat(t1)
&& isSamePtr(p1, p2)
&& n >= o + 8
-> (Const64F [0])
// Eliminate stores of values that have just been loaded from the same location.
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
// We also handle the common case where there are some intermediate stores.
(Store {t1} p1 (Load <t2> p2 mem) mem)
&& isSamePtr(p1, p2)
&& t2.Size() == sizeof(t1)
-> mem
(Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ oldmem))
&& isSamePtr(p1, p2)
&& t2.Size() == sizeof(t1)
&& disjoint(p1, sizeof(t1), p3, sizeof(t3))
-> mem
(Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ (Store {t4} p4 _ oldmem)))
&& isSamePtr(p1, p2)
&& t2.Size() == sizeof(t1)
&& disjoint(p1, sizeof(t1), p3, sizeof(t3))
&& disjoint(p1, sizeof(t1), p4, sizeof(t4))
-> mem
(Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 _ oldmem))))
&& isSamePtr(p1, p2)
&& t2.Size() == sizeof(t1)
&& disjoint(p1, sizeof(t1), p3, sizeof(t3))
&& disjoint(p1, sizeof(t1), p4, sizeof(t4))
&& disjoint(p1, sizeof(t1), p5, sizeof(t5))
-> mem
// Don't Store zeros to cleared variables.
(Store {t} (OffPtr [o] p1) x mem:(Zero [n] p2 _))
&& isConstZero(x)
&& o >= 0 && sizeof(t) + o <= n && isSamePtr(p1, p2)
-> mem
(Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Zero [n] p3 _)))
&& isConstZero(x)
&& o1 >= 0 && sizeof(t1) + o1 <= n && isSamePtr(p1, p3)
&& disjoint(op, sizeof(t1), p2, sizeof(t2))
-> mem
(Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Store {t3} p3 _ (Zero [n] p4 _))))
&& isConstZero(x)
&& o1 >= 0 && sizeof(t1) + o1 <= n && isSamePtr(p1, p4)
&& disjoint(op, sizeof(t1), p2, sizeof(t2))
&& disjoint(op, sizeof(t1), p3, sizeof(t3))
-> mem
(Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Zero [n] p5 _)))))
&& isConstZero(x)
&& o1 >= 0 && sizeof(t1) + o1 <= n && isSamePtr(p1, p5)
&& disjoint(op, sizeof(t1), p2, sizeof(t2))
&& disjoint(op, sizeof(t1), p3, sizeof(t3))
&& disjoint(op, sizeof(t1), p4, sizeof(t4))
-> mem
// Collapse OffPtr
(OffPtr (OffPtr p [b]) [a]) -> (OffPtr p [a+b])
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(OffPtr p [0]) && v.Type.Compare(p.Type) == types.CMPeq -> p
// indexing operations
// Note: bounds check has already been done
(PtrIndex <t> ptr idx) && config.PtrSize == 4 -> (AddPtr ptr (Mul32 <typ.Int> idx (Const32 <typ.Int> [t.Elem().Size()])))
(PtrIndex <t> ptr idx) && config.PtrSize == 8 -> (AddPtr ptr (Mul64 <typ.Int> idx (Const64 <typ.Int> [t.Elem().Size()])))
// struct operations
(StructSelect (StructMake1 x)) -> x
(StructSelect [0] (StructMake2 x _)) -> x
(StructSelect [1] (StructMake2 _ x)) -> x
(StructSelect [0] (StructMake3 x _ _)) -> x
(StructSelect [1] (StructMake3 _ x _)) -> x
(StructSelect [2] (StructMake3 _ _ x)) -> x
(StructSelect [0] (StructMake4 x _ _ _)) -> x
(StructSelect [1] (StructMake4 _ x _ _)) -> x
(StructSelect [2] (StructMake4 _ _ x _)) -> x
(StructSelect [3] (StructMake4 _ _ _ x)) -> x
(Load <t> _ _) && t.IsStruct() && t.NumFields() == 0 && fe.CanSSA(t) ->
(StructMake0)
(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 1 && fe.CanSSA(t) ->
(StructMake1
(Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem))
(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 2 && fe.CanSSA(t) ->
(StructMake2
(Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem)
(Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem))
(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 3 && fe.CanSSA(t) ->
(StructMake3
(Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem)
(Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem)
(Load <t.FieldType(2)> (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] ptr) mem))
(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 4 && fe.CanSSA(t) ->
(StructMake4
(Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem)
(Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem)
(Load <t.FieldType(2)> (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] ptr) mem)
(Load <t.FieldType(3)> (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] ptr) mem))
(StructSelect [i] x:(Load <t> ptr mem)) && !fe.CanSSA(t) ->
@x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem)
(Store _ (StructMake0) mem) -> mem
(Store dst (StructMake1 <t> f0) mem) ->
(Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)
(Store dst (StructMake2 <t> f0 f1) mem) ->
(Store {t.FieldType(1)}
(OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
f1
(Store {t.FieldType(0)}
(OffPtr <t.FieldType(0).PtrTo()> [0] dst)
f0 mem))
(Store dst (StructMake3 <t> f0 f1 f2) mem) ->
(Store {t.FieldType(2)}
(OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst)
f2
(Store {t.FieldType(1)}
(OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
f1
(Store {t.FieldType(0)}
(OffPtr <t.FieldType(0).PtrTo()> [0] dst)
f0 mem)))
(Store dst (StructMake4 <t> f0 f1 f2 f3) mem) ->
(Store {t.FieldType(3)}
(OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] dst)
f3
(Store {t.FieldType(2)}
(OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst)
f2
(Store {t.FieldType(1)}
(OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
f1
(Store {t.FieldType(0)}
(OffPtr <t.FieldType(0).PtrTo()> [0] dst)
f0 mem))))
// Putting struct{*byte} and similar into direct interfaces.
(IMake typ (StructMake1 val)) -> (IMake typ val)
(StructSelect [0] x:(IData _)) -> x
cmd/compile: better job of naming compound types Compound AUTO types weren't named previously. That was because live variable analysis (plive.go) doesn't handle spilling to compound types. It can't handle them because there is no valid place to put VARDEFs when regalloc is spilling compound types. compound types = multiword builtin types: complex, string, slice, and interface. Instead, we split named AUTOs into individual one-word variables. For example, a string s gets split into a byte ptr s.ptr and an integer s.len. Those two variables can be spilled to / restored from independently. As a result, live variable analysis can handle them because they are one-word objects. This CL will change how AUTOs are described in DWARF information. Consider the code: func f(s string, i int) int { x := s[i:i+5] g() return lookup(x) } The old compiler would spill x to two consecutive slots on the stack, both named x (at offsets 0 and 8). The new compiler spills the pointer of x to a slot named x.ptr. It doesn't spill x.len at all, as it is a constant (5) and can be rematerialized for the call to lookup. So compound objects may not be spilled in their entirety, and even if they are they won't necessarily be contiguous. Such is the price of optimization. Re-enable live variable analysis tests. One test remains disabled, it fails because of #14904. Change-Id: I8ef2b5ab91e43a0d2136bfc231c05d100ec0b801 Reviewed-on: https://go-review.googlesource.com/21233 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2016-03-28 11:25:17 -07:00
// un-SSAable values use mem->mem copies
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Store {t} dst (Load src mem) mem) && !fe.CanSSA(t.(*types.Type)) ->
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Move {t} [sizeof(t)] dst src mem)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Store {t} dst (Load src mem) (VarDef {x} mem)) && !fe.CanSSA(t.(*types.Type)) ->
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Move {t} [sizeof(t)] dst src (VarDef {x} mem))
// array ops
(ArraySelect (ArrayMake1 x)) -> x
(Load <t> _ _) && t.IsArray() && t.NumElem() == 0 ->
(ArrayMake0)
(Load <t> ptr mem) && t.IsArray() && t.NumElem() == 1 && fe.CanSSA(t) ->
(ArrayMake1 (Load <t.Elem()> ptr mem))
(Store _ (ArrayMake0) mem) -> mem
(Store dst (ArrayMake1 e) mem) -> (Store {e.Type} dst e mem)
// Putting [1]{*byte} and similar into direct interfaces.
(IMake typ (ArrayMake1 val)) -> (IMake typ val)
(ArraySelect [0] x:(IData _)) -> x
// string ops
// Decomposing StringMake and lowering of StringPtr and StringLen
// happens in a later pass, dec, so that these operations are available
cmd/compile: better job of naming compound types Compound AUTO types weren't named previously. That was because live variable analysis (plive.go) doesn't handle spilling to compound types. It can't handle them because there is no valid place to put VARDEFs when regalloc is spilling compound types. compound types = multiword builtin types: complex, string, slice, and interface. Instead, we split named AUTOs into individual one-word variables. For example, a string s gets split into a byte ptr s.ptr and an integer s.len. Those two variables can be spilled to / restored from independently. As a result, live variable analysis can handle them because they are one-word objects. This CL will change how AUTOs are described in DWARF information. Consider the code: func f(s string, i int) int { x := s[i:i+5] g() return lookup(x) } The old compiler would spill x to two consecutive slots on the stack, both named x (at offsets 0 and 8). The new compiler spills the pointer of x to a slot named x.ptr. It doesn't spill x.len at all, as it is a constant (5) and can be rematerialized for the call to lookup. So compound objects may not be spilled in their entirety, and even if they are they won't necessarily be contiguous. Such is the price of optimization. Re-enable live variable analysis tests. One test remains disabled, it fails because of #14904. Change-Id: I8ef2b5ab91e43a0d2136bfc231c05d100ec0b801 Reviewed-on: https://go-review.googlesource.com/21233 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2016-03-28 11:25:17 -07:00
// to other passes for optimizations.
(StringPtr (StringMake (Addr <t> {s} base) _)) -> (Addr <t> {s} base)
(StringLen (StringMake _ (Const64 <t> [c]))) -> (Const64 <t> [c])
(ConstString {s}) && config.PtrSize == 4 && s.(string) == "" ->
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(StringMake (ConstNil) (Const32 <typ.Int> [0]))
(ConstString {s}) && config.PtrSize == 8 && s.(string) == "" ->
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(StringMake (ConstNil) (Const64 <typ.Int> [0]))
(ConstString {s}) && config.PtrSize == 4 && s.(string) != "" ->
(StringMake
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Addr <typ.BytePtr> {fe.StringData(s.(string))}
(SB))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const32 <typ.Int> [int64(len(s.(string)))]))
(ConstString {s}) && config.PtrSize == 8 && s.(string) != "" ->
(StringMake
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Addr <typ.BytePtr> {fe.StringData(s.(string))}
(SB))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.Int> [int64(len(s.(string)))]))
// slice ops
cmd/compile: better job of naming compound types Compound AUTO types weren't named previously. That was because live variable analysis (plive.go) doesn't handle spilling to compound types. It can't handle them because there is no valid place to put VARDEFs when regalloc is spilling compound types. compound types = multiword builtin types: complex, string, slice, and interface. Instead, we split named AUTOs into individual one-word variables. For example, a string s gets split into a byte ptr s.ptr and an integer s.len. Those two variables can be spilled to / restored from independently. As a result, live variable analysis can handle them because they are one-word objects. This CL will change how AUTOs are described in DWARF information. Consider the code: func f(s string, i int) int { x := s[i:i+5] g() return lookup(x) } The old compiler would spill x to two consecutive slots on the stack, both named x (at offsets 0 and 8). The new compiler spills the pointer of x to a slot named x.ptr. It doesn't spill x.len at all, as it is a constant (5) and can be rematerialized for the call to lookup. So compound objects may not be spilled in their entirety, and even if they are they won't necessarily be contiguous. Such is the price of optimization. Re-enable live variable analysis tests. One test remains disabled, it fails because of #14904. Change-Id: I8ef2b5ab91e43a0d2136bfc231c05d100ec0b801 Reviewed-on: https://go-review.googlesource.com/21233 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2016-03-28 11:25:17 -07:00
// Only a few slice rules are provided here. See dec.rules for
// a more comprehensive set.
(SliceLen (SliceMake _ (Const64 <t> [c]) _)) -> (Const64 <t> [c])
(SliceCap (SliceMake _ _ (Const64 <t> [c]))) -> (Const64 <t> [c])
(SliceLen (SliceMake _ (Const32 <t> [c]) _)) -> (Const32 <t> [c])
(SliceCap (SliceMake _ _ (Const32 <t> [c]))) -> (Const32 <t> [c])
(SlicePtr (SliceMake (SlicePtr x) _ _)) -> (SlicePtr x)
(SliceLen (SliceMake _ (SliceLen x) _)) -> (SliceLen x)
(SliceCap (SliceMake _ _ (SliceCap x))) -> (SliceCap x)
(SliceCap (SliceMake _ _ (SliceLen x))) -> (SliceLen x)
(ConstSlice) && config.PtrSize == 4 ->
(SliceMake
(ConstNil <v.Type.Elem().PtrTo()>)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const32 <typ.Int> [0])
(Const32 <typ.Int> [0]))
(ConstSlice) && config.PtrSize == 8 ->
(SliceMake
(ConstNil <v.Type.Elem().PtrTo()>)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.Int> [0])
(Const64 <typ.Int> [0]))
// interface ops
(ConstInterface) ->
(IMake
(ConstNil <typ.Uintptr>)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(ConstNil <typ.BytePtr>))
(NilCheck (GetG mem) mem) -> mem
(If (Not cond) yes no) -> (If cond no yes)
(If (ConstBool [c]) yes no) && c == 1 -> (First nil yes no)
(If (ConstBool [c]) yes no) && c == 0 -> (First nil no yes)
// Get rid of Convert ops for pointer arithmetic on unsafe.Pointer.
(Convert (Add(64|32) (Convert ptr mem) off) mem) -> (Add(64|32) ptr off)
(Convert (Convert ptr mem) mem) -> ptr
// strength reduction of divide by a constant.
// See ../magic.go for a detailed description of these algorithms.
// Unsigned divide by power of 2. Strength reduce to a shift.
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Div8u n (Const8 [c])) && isPowerOfTwo(c&0xff) -> (Rsh8Ux64 n (Const64 <typ.UInt64> [log2(c&0xff)]))
(Div16u n (Const16 [c])) && isPowerOfTwo(c&0xffff) -> (Rsh16Ux64 n (Const64 <typ.UInt64> [log2(c&0xffff)]))
(Div32u n (Const32 [c])) && isPowerOfTwo(c&0xffffffff) -> (Rsh32Ux64 n (Const64 <typ.UInt64> [log2(c&0xffffffff)]))
(Div64u n (Const64 [c])) && isPowerOfTwo(c) -> (Rsh64Ux64 n (Const64 <typ.UInt64> [log2(c)]))
(Div64u n (Const64 [-1<<63])) -> (Rsh64Ux64 n (Const64 <typ.UInt64> [63]))
// Signed non-negative divide by power of 2.
(Div8 n (Const8 [c])) && isNonNegative(n) && isPowerOfTwo(c&0xff) -> (Rsh8Ux64 n (Const64 <typ.UInt64> [log2(c&0xff)]))
(Div16 n (Const16 [c])) && isNonNegative(n) && isPowerOfTwo(c&0xffff) -> (Rsh16Ux64 n (Const64 <typ.UInt64> [log2(c&0xffff)]))
(Div32 n (Const32 [c])) && isNonNegative(n) && isPowerOfTwo(c&0xffffffff) -> (Rsh32Ux64 n (Const64 <typ.UInt64> [log2(c&0xffffffff)]))
(Div64 n (Const64 [c])) && isNonNegative(n) && isPowerOfTwo(c) -> (Rsh64Ux64 n (Const64 <typ.UInt64> [log2(c)]))
(Div64 n (Const64 [-1<<63])) && isNonNegative(n) -> (Const64 [0])
// Unsigned divide, not a power of 2. Strength reduce to a multiply.
// For 8-bit divides, we just do a direct 9-bit by 8-bit multiply.
(Div8u x (Const8 [c])) && umagicOK(8, c) ->
(Trunc32to8
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Mul32 <typ.UInt32>
(Const32 <typ.UInt32> [int64(1<<8+umagic(8,c).m)])
(ZeroExt8to32 x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [8+umagic(8,c).s])))
// For 16-bit divides on 64-bit machines, we do a direct 17-bit by 16-bit multiply.
(Div16u x (Const16 [c])) && umagicOK(16, c) && config.RegSize == 8 ->
(Trunc64to16
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Mul64 <typ.UInt64>
(Const64 <typ.UInt64> [int64(1<<16+umagic(16,c).m)])
(ZeroExt16to64 x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [16+umagic(16,c).s])))
// For 16-bit divides on 32-bit machines
(Div16u x (Const16 [c])) && umagicOK(16, c) && config.RegSize == 4 && umagic(16,c).m&1 == 0 ->
(Trunc32to16
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Mul32 <typ.UInt32>
(Const32 <typ.UInt32> [int64(1<<15+umagic(16,c).m/2)])
(ZeroExt16to32 x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [16+umagic(16,c).s-1])))
(Div16u x (Const16 [c])) && umagicOK(16, c) && config.RegSize == 4 && c&1 == 0 ->
(Trunc32to16
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Mul32 <typ.UInt32>
(Const32 <typ.UInt32> [int64(1<<15+(umagic(16,c).m+1)/2)])
(Rsh32Ux64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [1])))
(Const64 <typ.UInt64> [16+umagic(16,c).s-2])))
(Div16u x (Const16 [c])) && umagicOK(16, c) && config.RegSize == 4 && config.useAvg ->
(Trunc32to16
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Avg32u
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Lsh32x64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [16]))
(Mul32 <typ.UInt32>
(Const32 <typ.UInt32> [int64(umagic(16,c).m)])
(ZeroExt16to32 x)))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [16+umagic(16,c).s-1])))
// For 32-bit divides on 32-bit machines
(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 4 && umagic(32,c).m&1 == 0 && config.useHmul ->
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Hmul32u <typ.UInt32>
(Const32 <typ.UInt32> [int64(int32(1<<31+umagic(32,c).m/2))])
x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [umagic(32,c).s-1]))
(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 4 && c&1 == 0 && config.useHmul ->
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Hmul32u <typ.UInt32>
(Const32 <typ.UInt32> [int64(int32(1<<31+(umagic(32,c).m+1)/2))])
(Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1])))
(Const64 <typ.UInt64> [umagic(32,c).s-2]))
(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 4 && config.useAvg && config.useHmul ->
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Avg32u
x
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Hmul32u <typ.UInt32>
(Const32 <typ.UInt32> [int64(int32(umagic(32,c).m))])
x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [umagic(32,c).s-1]))
// For 32-bit divides on 64-bit machines
// We'll use a regular (non-hi) multiply for this case.
(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 8 && umagic(32,c).m&1 == 0 ->
(Trunc64to32
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Mul64 <typ.UInt64>
(Const64 <typ.UInt64> [int64(1<<31+umagic(32,c).m/2)])
(ZeroExt32to64 x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [32+umagic(32,c).s-1])))
(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 8 && c&1 == 0 ->
(Trunc64to32
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Mul64 <typ.UInt64>
(Const64 <typ.UInt64> [int64(1<<31+(umagic(32,c).m+1)/2)])
(Rsh64Ux64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [1])))
(Const64 <typ.UInt64> [32+umagic(32,c).s-2])))
(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 8 && config.useAvg ->
(Trunc64to32
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Avg64u
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Lsh64x64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [32]))
(Mul64 <typ.UInt64>
(Const64 <typ.UInt32> [int64(umagic(32,c).m)])
(ZeroExt32to64 x)))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [32+umagic(32,c).s-1])))
// For 64-bit divides on 64-bit machines
// (64-bit divides on 32-bit machines are lowered to a runtime call by the walk pass.)
(Div64u x (Const64 [c])) && umagicOK(64, c) && config.RegSize == 8 && umagic(64,c).m&1 == 0 && config.useHmul ->
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Hmul64u <typ.UInt64>
(Const64 <typ.UInt64> [int64(1<<63+umagic(64,c).m/2)])
x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [umagic(64,c).s-1]))
(Div64u x (Const64 [c])) && umagicOK(64, c) && config.RegSize == 8 && c&1 == 0 && config.useHmul ->
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Hmul64u <typ.UInt64>
(Const64 <typ.UInt64> [int64(1<<63+(umagic(64,c).m+1)/2)])
(Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1])))
(Const64 <typ.UInt64> [umagic(64,c).s-2]))
(Div64u x (Const64 [c])) && umagicOK(64, c) && config.RegSize == 8 && config.useAvg && config.useHmul ->
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Avg64u
x
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Hmul64u <typ.UInt64>
(Const64 <typ.UInt64> [int64(umagic(64,c).m)])
x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [umagic(64,c).s-1]))
// Signed divide by a negative constant. Rewrite to divide by a positive constant.
(Div8 <t> n (Const8 [c])) && c < 0 && c != -1<<7 -> (Neg8 (Div8 <t> n (Const8 <t> [-c])))
(Div16 <t> n (Const16 [c])) && c < 0 && c != -1<<15 -> (Neg16 (Div16 <t> n (Const16 <t> [-c])))
(Div32 <t> n (Const32 [c])) && c < 0 && c != -1<<31 -> (Neg32 (Div32 <t> n (Const32 <t> [-c])))
(Div64 <t> n (Const64 [c])) && c < 0 && c != -1<<63 -> (Neg64 (Div64 <t> n (Const64 <t> [-c])))
// Dividing by the most-negative number. Result is always 0 except
// if the input is also the most-negative number.
// We can detect that using the sign bit of x & -x.
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Div8 <t> x (Const8 [-1<<7 ])) -> (Rsh8Ux64 (And8 <t> x (Neg8 <t> x)) (Const64 <typ.UInt64> [7 ]))
(Div16 <t> x (Const16 [-1<<15])) -> (Rsh16Ux64 (And16 <t> x (Neg16 <t> x)) (Const64 <typ.UInt64> [15]))
(Div32 <t> x (Const32 [-1<<31])) -> (Rsh32Ux64 (And32 <t> x (Neg32 <t> x)) (Const64 <typ.UInt64> [31]))
(Div64 <t> x (Const64 [-1<<63])) -> (Rsh64Ux64 (And64 <t> x (Neg64 <t> x)) (Const64 <typ.UInt64> [63]))
// Signed divide by power of 2.
// n / c = n >> log(c) if n >= 0
// = (n+c-1) >> log(c) if n < 0
// We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned).
(Div8 <t> n (Const8 [c])) && isPowerOfTwo(c) ->
(Rsh8x64
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Add8 <t> n (Rsh8Ux64 <t> (Rsh8x64 <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [ 8-log2(c)])))
(Const64 <typ.UInt64> [log2(c)]))
(Div16 <t> n (Const16 [c])) && isPowerOfTwo(c) ->
(Rsh16x64
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [16-log2(c)])))
(Const64 <typ.UInt64> [log2(c)]))
(Div32 <t> n (Const32 [c])) && isPowerOfTwo(c) ->
(Rsh32x64
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [32-log2(c)])))
(Const64 <typ.UInt64> [log2(c)]))
(Div64 <t> n (Const64 [c])) && isPowerOfTwo(c) ->
(Rsh64x64
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [64-log2(c)])))
(Const64 <typ.UInt64> [log2(c)]))
// Signed divide, not a power of 2. Strength reduce to a multiply.
(Div8 <t> x (Const8 [c])) && smagicOK(8,c) ->
(Sub8 <t>
(Rsh32x64 <t>
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Mul32 <typ.UInt32>
(Const32 <typ.UInt32> [int64(smagic(8,c).m)])
(SignExt8to32 x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [8+smagic(8,c).s]))
(Rsh32x64 <t>
(SignExt8to32 x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [31])))
(Div16 <t> x (Const16 [c])) && smagicOK(16,c) ->
(Sub16 <t>
(Rsh32x64 <t>
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Mul32 <typ.UInt32>
(Const32 <typ.UInt32> [int64(smagic(16,c).m)])
(SignExt16to32 x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [16+smagic(16,c).s]))
(Rsh32x64 <t>
(SignExt16to32 x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [31])))
(Div32 <t> x (Const32 [c])) && smagicOK(32,c) && config.RegSize == 8 ->
(Sub32 <t>
(Rsh64x64 <t>
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Mul64 <typ.UInt64>
(Const64 <typ.UInt64> [int64(smagic(32,c).m)])
(SignExt32to64 x))
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [32+smagic(32,c).s]))
(Rsh64x64 <t>
(SignExt32to64 x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [63])))
(Div32 <t> x (Const32 [c])) && smagicOK(32,c) && config.RegSize == 4 && smagic(32,c).m&1 == 0 && config.useHmul ->
(Sub32 <t>
(Rsh32x64 <t>
(Hmul32 <t>
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const32 <typ.UInt32> [int64(int32(smagic(32,c).m/2))])
x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [smagic(32,c).s-1]))
(Rsh32x64 <t>
x
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [31])))
(Div32 <t> x (Const32 [c])) && smagicOK(32,c) && config.RegSize == 4 && smagic(32,c).m&1 != 0 && config.useHmul ->
(Sub32 <t>
(Rsh32x64 <t>
(Add32 <t>
(Hmul32 <t>
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const32 <typ.UInt32> [int64(int32(smagic(32,c).m))])
x)
x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [smagic(32,c).s]))
(Rsh32x64 <t>
x
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [31])))
(Div64 <t> x (Const64 [c])) && smagicOK(64,c) && smagic(64,c).m&1 == 0 && config.useHmul ->
(Sub64 <t>
(Rsh64x64 <t>
(Hmul64 <t>
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [int64(smagic(64,c).m/2)])
x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [smagic(64,c).s-1]))
(Rsh64x64 <t>
x
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [63])))
(Div64 <t> x (Const64 [c])) && smagicOK(64,c) && smagic(64,c).m&1 != 0 && config.useHmul ->
(Sub64 <t>
(Rsh64x64 <t>
(Add64 <t>
(Hmul64 <t>
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [int64(smagic(64,c).m)])
x)
x)
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [smagic(64,c).s]))
(Rsh64x64 <t>
x
cmd/compile: change ssa.Type into *types.Type When package ssa was created, Type was in package gc. To avoid circular dependencies, we used an interface (ssa.Type) to represent type information in SSA. In the Go 1.9 cycle, gri extricated the Type type from package gc. As a result, we can now use it in package ssa. Now, instead of package types depending on package ssa, it is the other way. This is a more sensible dependency tree, and helps compiler performance a bit. Though this is a big CL, most of the changes are mechanical and uninteresting. Interesting bits: * Add new singleton globals to package types for the special SSA types Memory, Void, Invalid, Flags, and Int128. * Add two new Types, TSSA for the special types, and TTUPLE, for SSA tuple types. ssa.MakeTuple is now types.NewTuple. * Move type comparison result constants CMPlt, CMPeq, and CMPgt to package types. * We had picked the name "types" in our rules for the handy list of types provided by ssa.Config. That conflicted with the types package name, so change it to "typ". * Update the type comparison routine to handle tuples and special types inline. * Teach gc/fmt.go how to print special types. * We can now eliminate ElemTypes in favor of just Elem, and probably also some other duplicated Type methods designed to return ssa.Type instead of *types.Type. * The ssa tests were using their own dummy types, and they were not particularly careful about types in general. Of necessity, this CL switches them to use *types.Type; it does not make them more type-accurate. Unfortunately, using types.Type means initializing a bit of the types universe. This is prime for refactoring and improvement. This shrinks ssa.Value; it now fits in a smaller size class on 64 bit systems. This doesn't have a giant impact, though, since most Values are preallocated in a chunk. name old alloc/op new alloc/op delta Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8) Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10) GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10) Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10) GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9) Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8) Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10) XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10) [Geo mean] 40.5MB 40.3MB -0.68% name old allocs/op new allocs/op delta Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9) Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10) Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10) GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9) Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8) Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10) XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10) [Geo mean] 428k 428k -0.01% Removing all the interface calls helps non-trivially with CPU, though. name old time/op new time/op delta Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96) Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96) GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96) Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99) GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97) Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99) Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94) XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95) [Geo mean] 178ms 173ms -2.65% name old user-time/op new user-time/op delta Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99) Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95) GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99) Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96) GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100) Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92) Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100) XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97) [Geo mean] 220ms 213ms -2.76% Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1 Reviewed-on: https://go-review.googlesource.com/42145 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [63])))
// Unsigned mod by power of 2 constant.
(Mod8u <t> n (Const8 [c])) && isPowerOfTwo(c&0xff) -> (And8 n (Const8 <t> [(c&0xff)-1]))
(Mod16u <t> n (Const16 [c])) && isPowerOfTwo(c&0xffff) -> (And16 n (Const16 <t> [(c&0xffff)-1]))
(Mod32u <t> n (Const32 [c])) && isPowerOfTwo(c&0xffffffff) -> (And32 n (Const32 <t> [(c&0xffffffff)-1]))
(Mod64u <t> n (Const64 [c])) && isPowerOfTwo(c) -> (And64 n (Const64 <t> [c-1]))
(Mod64u <t> n (Const64 [-1<<63])) -> (And64 n (Const64 <t> [1<<63-1]))
// Signed non-negative mod by power of 2 constant.
(Mod8 <t> n (Const8 [c])) && isNonNegative(n) && isPowerOfTwo(c&0xff) -> (And8 n (Const8 <t> [(c&0xff)-1]))
(Mod16 <t> n (Const16 [c])) && isNonNegative(n) && isPowerOfTwo(c&0xffff) -> (And16 n (Const16 <t> [(c&0xffff)-1]))
(Mod32 <t> n (Const32 [c])) && isNonNegative(n) && isPowerOfTwo(c&0xffffffff) -> (And32 n (Const32 <t> [(c&0xffffffff)-1]))
(Mod64 <t> n (Const64 [c])) && isNonNegative(n) && isPowerOfTwo(c) -> (And64 n (Const64 <t> [c-1]))
(Mod64 n (Const64 [-1<<63])) && isNonNegative(n) -> n
// Signed mod by negative constant.
(Mod8 <t> n (Const8 [c])) && c < 0 && c != -1<<7 -> (Mod8 <t> n (Const8 <t> [-c]))
(Mod16 <t> n (Const16 [c])) && c < 0 && c != -1<<15 -> (Mod16 <t> n (Const16 <t> [-c]))
(Mod32 <t> n (Const32 [c])) && c < 0 && c != -1<<31 -> (Mod32 <t> n (Const32 <t> [-c]))
(Mod64 <t> n (Const64 [c])) && c < 0 && c != -1<<63 -> (Mod64 <t> n (Const64 <t> [-c]))
// All other mods by constants, do A%B = A-(A/B*B).
// This implements % with two * and a bunch of ancillary ops.
// One of the * is free if the user's code also computes A/B.
(Mod8 <t> x (Const8 [c])) && x.Op != OpConst8 && (c > 0 || c == -1<<7)
-> (Sub8 x (Mul8 <t> (Div8 <t> x (Const8 <t> [c])) (Const8 <t> [c])))
(Mod16 <t> x (Const16 [c])) && x.Op != OpConst16 && (c > 0 || c == -1<<15)
-> (Sub16 x (Mul16 <t> (Div16 <t> x (Const16 <t> [c])) (Const16 <t> [c])))
(Mod32 <t> x (Const32 [c])) && x.Op != OpConst32 && (c > 0 || c == -1<<31)
-> (Sub32 x (Mul32 <t> (Div32 <t> x (Const32 <t> [c])) (Const32 <t> [c])))
(Mod64 <t> x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63)
-> (Sub64 x (Mul64 <t> (Div64 <t> x (Const64 <t> [c])) (Const64 <t> [c])))
(Mod8u <t> x (Const8 [c])) && x.Op != OpConst8 && c > 0 && umagicOK(8 ,c)
-> (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c])))
(Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK(16,c)
-> (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK(32,c)
-> (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK(64,c)
-> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
(Eq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 -> (Eq(8|16|32|64) x y)
(Neq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 -> (Neq(8|16|32|64) x y)
cmd/compile/ssa: more aggressive constant folding Add rewrite rules that canonicalize the location of constants in expressions, and fold conststants that appear in operations that can be trivially reassociated. After this change, the compiler constant-folds expressions like "4 + x - 1" and "4 & x & 1" Benchmarks affected on darwin/amd64: name old time/op new time/op delta FmtFprintfInt-8 82.1ns ± 1% 81.7ns ± 1% -0.46% (p=0.023 n=8+9) FmtFprintfIntInt-8 122ns ± 2% 120ns ± 2% -1.48% (p=0.047 n=10+10) FmtManyArgs-8 493ns ± 0% 486ns ± 1% -1.37% (p=0.000 n=8+10) Gzip-8 230ms ± 0% 229ms ± 1% -0.46% (p=0.001 n=10+9) HTTPClientServer-8 74.5µs ± 1% 73.7µs ± 1% -1.11% (p=0.000 n=10+10) JSONDecode-8 51.7ms ± 0% 51.9ms ± 1% +0.42% (p=0.017 n=10+9) RegexpMatchEasy0_32-8 82.6ns ± 1% 81.7ns ± 0% -1.02% (p=0.000 n=9+8) RegexpMatchMedium_32-8 121ns ± 1% 120ns ± 1% -1.48% (p=0.001 n=10+10) Revcomp-8 426ms ± 1% 400ms ± 1% -6.16% (p=0.000 n=10+10) TimeFormat-8 330ns ± 1% 327ns ± 0% -0.82% (p=0.000 n=10+10) name old speed new speed delta Gzip-8 84.4MB/s ± 0% 84.8MB/s ± 1% +0.47% (p=0.001 n=10+9) JSONDecode-8 37.6MB/s ± 0% 37.4MB/s ± 1% -0.42% (p=0.016 n=10+9) RegexpMatchEasy0_32-8 387MB/s ± 1% 392MB/s ± 0% +1.06% (p=0.000 n=9+8) RegexpMatchMedium_32-8 8.21MB/s ± 1% 8.34MB/s ± 1% +1.58% (p=0.000 n=10+9) Revcomp-8 597MB/s ± 1% 636MB/s ± 1% +6.57% (p=0.000 n=10+10) Change-Id: Ie37ff91605b76a984a8400dfd1e34f50bf61c864 Reviewed-on: https://go-review.googlesource.com/37290 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-20 08:43:54 -08:00
// Reassociate expressions involving
// constants such that constants come first,
// exposing obvious constant-folding opportunities.
cmd/compile: automatically handle commuting ops in rewrite rules Note that this is a redo of an undo of the original buggy CL 38666. We have lots of rewrite rules that vary only in the fact that we have 2 versions for the 2 different orderings of various commuting ops. For example: (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x) (ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x) It can get unwieldly quickly, especially when there is more than one commuting op in a rule. Our existing "fix" for this problem is to have rules that canonicalize the operations first. For example: (Eq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Eq64 (Const64 <t> [c]) x) Subsequent rules can then assume if there is a constant arg to Eq64, it will be the first one. This fix kinda works, but it is fragile and only works when we remember to include the required extra rules. The fundamental problem is that the rule matcher doesn't know anything about commuting ops. This CL fixes that fact. We already have information about which ops commute. (The register allocator takes advantage of commutivity.) The rule generator now automatically generates multiple rules for a single source rule when there are commutative ops in the rule. We can now drop all of our almost-duplicate source-level rules and the canonicalization rules. I have some CLs in progress that will be a lot less verbose when the rule generator handles commutivity for me. I had to reorganize the load-combining rules a bit. The 8-way OR rules generated 128 different reorderings, which was causing the generator to put too much code in the rewrite*.go files (the big ones were going from 25K lines to 132K lines). Instead I reorganized the rules to combine pairs of loads at a time. The generated rule files are now actually a bit (5%) smaller. Make.bash times are ~unchanged. Compiler benchmarks are not observably different. Probably because we don't spend much compiler time in rule matching anyway. I've also done a pass over all of our ops adding commutative markings for ops which hadn't had them previously. Fixes #18292 Change-Id: Ic1c0e43fbf579539f459971625f69690c9ab8805 Reviewed-on: https://go-review.googlesource.com/38801 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2017-03-30 03:30:22 +00:00
// Reassociate (op (op y C) x) to (op C (op x y)) or similar, where C
cmd/compile/ssa: more aggressive constant folding Add rewrite rules that canonicalize the location of constants in expressions, and fold conststants that appear in operations that can be trivially reassociated. After this change, the compiler constant-folds expressions like "4 + x - 1" and "4 & x & 1" Benchmarks affected on darwin/amd64: name old time/op new time/op delta FmtFprintfInt-8 82.1ns ± 1% 81.7ns ± 1% -0.46% (p=0.023 n=8+9) FmtFprintfIntInt-8 122ns ± 2% 120ns ± 2% -1.48% (p=0.047 n=10+10) FmtManyArgs-8 493ns ± 0% 486ns ± 1% -1.37% (p=0.000 n=8+10) Gzip-8 230ms ± 0% 229ms ± 1% -0.46% (p=0.001 n=10+9) HTTPClientServer-8 74.5µs ± 1% 73.7µs ± 1% -1.11% (p=0.000 n=10+10) JSONDecode-8 51.7ms ± 0% 51.9ms ± 1% +0.42% (p=0.017 n=10+9) RegexpMatchEasy0_32-8 82.6ns ± 1% 81.7ns ± 0% -1.02% (p=0.000 n=9+8) RegexpMatchMedium_32-8 121ns ± 1% 120ns ± 1% -1.48% (p=0.001 n=10+10) Revcomp-8 426ms ± 1% 400ms ± 1% -6.16% (p=0.000 n=10+10) TimeFormat-8 330ns ± 1% 327ns ± 0% -0.82% (p=0.000 n=10+10) name old speed new speed delta Gzip-8 84.4MB/s ± 0% 84.8MB/s ± 1% +0.47% (p=0.001 n=10+9) JSONDecode-8 37.6MB/s ± 0% 37.4MB/s ± 1% -0.42% (p=0.016 n=10+9) RegexpMatchEasy0_32-8 387MB/s ± 1% 392MB/s ± 0% +1.06% (p=0.000 n=9+8) RegexpMatchMedium_32-8 8.21MB/s ± 1% 8.34MB/s ± 1% +1.58% (p=0.000 n=10+9) Revcomp-8 597MB/s ± 1% 636MB/s ± 1% +6.57% (p=0.000 n=10+10) Change-Id: Ie37ff91605b76a984a8400dfd1e34f50bf61c864 Reviewed-on: https://go-review.googlesource.com/37290 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-20 08:43:54 -08:00
// is constant, which pushes constants to the outside
// of the expression. At that point, any constant-folding
// opportunities should be obvious.
// x + (C + z) -> C + (x + z)
(Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Add64 <t> z x))
(Add32 (Add32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Add32 i (Add32 <t> z x))
(Add16 (Add16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Add16 i (Add16 <t> z x))
(Add8 (Add8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Add8 i (Add8 <t> z x))
// x + (C - z) -> C + (x - z)
(Add64 (Sub64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Sub64 <t> x z))
(Add32 (Sub32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Add32 i (Sub32 <t> x z))
(Add16 (Sub16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Add16 i (Sub16 <t> x z))
(Add8 (Sub8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Add8 i (Sub8 <t> x z))
(Add64 x (Sub64 i:(Const64 <t>) z)) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Sub64 <t> x z))
(Add32 x (Sub32 i:(Const32 <t>) z)) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Add32 i (Sub32 <t> x z))
(Add16 x (Sub16 i:(Const16 <t>) z)) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Add16 i (Sub16 <t> x z))
(Add8 x (Sub8 i:(Const8 <t>) z)) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Add8 i (Sub8 <t> x z))
// x + (z - C) -> (x + z) - C
(Add64 (Sub64 z i:(Const64 <t>)) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Sub64 (Add64 <t> x z) i)
(Add32 (Sub32 z i:(Const32 <t>)) x) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Sub32 (Add32 <t> x z) i)
(Add16 (Sub16 z i:(Const16 <t>)) x) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Sub16 (Add16 <t> x z) i)
(Add8 (Sub8 z i:(Const8 <t>)) x) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Sub8 (Add8 <t> x z) i)
(Add64 x (Sub64 z i:(Const64 <t>))) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Sub64 (Add64 <t> x z) i)
(Add32 x (Sub32 z i:(Const32 <t>))) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Sub32 (Add32 <t> x z) i)
(Add16 x (Sub16 z i:(Const16 <t>))) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Sub16 (Add16 <t> x z) i)
(Add8 x (Sub8 z i:(Const8 <t>))) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Sub8 (Add8 <t> x z) i)
// x - (C - z) -> x + (z - C) -> (x + z) - C
(Sub64 x (Sub64 i:(Const64 <t>) z)) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Sub64 (Add64 <t> x z) i)
(Sub32 x (Sub32 i:(Const32 <t>) z)) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Sub32 (Add32 <t> x z) i)
(Sub16 x (Sub16 i:(Const16 <t>) z)) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Sub16 (Add16 <t> x z) i)
(Sub8 x (Sub8 i:(Const8 <t>) z)) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Sub8 (Add8 <t> x z) i)
// x - (z - C) -> x + (C - z) -> (x - z) + C
(Sub64 x (Sub64 z i:(Const64 <t>))) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Sub64 <t> x z))
(Sub32 x (Sub32 z i:(Const32 <t>))) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Add32 i (Sub32 <t> x z))
(Sub16 x (Sub16 z i:(Const16 <t>))) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Add16 i (Sub16 <t> x z))
(Sub8 x (Sub8 z i:(Const8 <t>))) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Add8 i (Sub8 <t> x z))
// x & (C & z) -> C & (x & z)
(And64 (And64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (And64 i (And64 <t> z x))
(And32 (And32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) -> (And32 i (And32 <t> z x))
(And16 (And16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) -> (And16 i (And16 <t> z x))
(And8 (And8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) -> (And8 i (And8 <t> z x))
// x | (C | z) -> C | (x | z)
(Or64 (Or64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Or64 i (Or64 <t> z x))
(Or32 (Or32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Or32 i (Or32 <t> z x))
(Or16 (Or16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Or16 i (Or16 <t> z x))
(Or8 (Or8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Or8 i (Or8 <t> z x))
// x ^ (C ^ z) -> C ^ (x ^ z)
(Xor64 (Xor64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Xor64 i (Xor64 <t> z x))
(Xor32 (Xor32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Xor32 i (Xor32 <t> z x))
(Xor16 (Xor16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) -> (Xor16 i (Xor16 <t> z x))
(Xor8 (Xor8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) -> (Xor8 i (Xor8 <t> z x))
// C + (D + x) -> (C + D) + x
(Add64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) -> (Add64 (Const64 <t> [c+d]) x)
(Add32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) -> (Add32 (Const32 <t> [int64(int32(c+d))]) x)
(Add16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) -> (Add16 (Const16 <t> [int64(int16(c+d))]) x)
(Add8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) -> (Add8 (Const8 <t> [int64(int8(c+d))]) x)
// C + (D - x) -> (C + D) - x
(Add64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x)) -> (Sub64 (Const64 <t> [c+d]) x)
(Add32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x)) -> (Sub32 (Const32 <t> [int64(int32(c+d))]) x)
(Add16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x)) -> (Sub16 (Const16 <t> [int64(int16(c+d))]) x)
(Add8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x)) -> (Sub8 (Const8 <t> [int64(int8(c+d))]) x)
// C + (x - D) -> (C - D) + x
(Add64 (Const64 <t> [c]) (Sub64 x (Const64 <t> [d]))) -> (Add64 (Const64 <t> [c-d]) x)
(Add32 (Const32 <t> [c]) (Sub32 x (Const32 <t> [d]))) -> (Add32 (Const32 <t> [int64(int32(c-d))]) x)
(Add16 (Const16 <t> [c]) (Sub16 x (Const16 <t> [d]))) -> (Add16 (Const16 <t> [int64(int16(c-d))]) x)
(Add8 (Const8 <t> [c]) (Sub8 x (Const8 <t> [d]))) -> (Add8 (Const8 <t> [int64(int8(c-d))]) x)
// C - (x - D) -> (C + D) - x
(Sub64 (Const64 <t> [c]) (Sub64 x (Const64 <t> [d]))) -> (Sub64 (Const64 <t> [c+d]) x)
(Sub32 (Const32 <t> [c]) (Sub32 x (Const32 <t> [d]))) -> (Sub32 (Const32 <t> [int64(int32(c+d))]) x)
(Sub16 (Const16 <t> [c]) (Sub16 x (Const16 <t> [d]))) -> (Sub16 (Const16 <t> [int64(int16(c+d))]) x)
(Sub8 (Const8 <t> [c]) (Sub8 x (Const8 <t> [d]))) -> (Sub8 (Const8 <t> [int64(int8(c+d))]) x)
// C - (D - x) -> (C - D) + x
(Sub64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x)) -> (Add64 (Const64 <t> [c-d]) x)
(Sub32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x)) -> (Add32 (Const32 <t> [int64(int32(c-d))]) x)
(Sub16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x)) -> (Add16 (Const16 <t> [int64(int16(c-d))]) x)
(Sub8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x)) -> (Add8 (Const8 <t> [int64(int8(c-d))]) x)
// C & (D & x) -> (C & D) & x
(And64 (Const64 <t> [c]) (And64 (Const64 <t> [d]) x)) -> (And64 (Const64 <t> [c&d]) x)
(And32 (Const32 <t> [c]) (And32 (Const32 <t> [d]) x)) -> (And32 (Const32 <t> [int64(int32(c&d))]) x)
(And16 (Const16 <t> [c]) (And16 (Const16 <t> [d]) x)) -> (And16 (Const16 <t> [int64(int16(c&d))]) x)
(And8 (Const8 <t> [c]) (And8 (Const8 <t> [d]) x)) -> (And8 (Const8 <t> [int64(int8(c&d))]) x)
// C | (D | x) -> (C | D) | x
(Or64 (Const64 <t> [c]) (Or64 (Const64 <t> [d]) x)) -> (Or64 (Const64 <t> [c|d]) x)
(Or32 (Const32 <t> [c]) (Or32 (Const32 <t> [d]) x)) -> (Or32 (Const32 <t> [int64(int32(c|d))]) x)
(Or16 (Const16 <t> [c]) (Or16 (Const16 <t> [d]) x)) -> (Or16 (Const16 <t> [int64(int16(c|d))]) x)
(Or8 (Const8 <t> [c]) (Or8 (Const8 <t> [d]) x)) -> (Or8 (Const8 <t> [int64(int8(c|d))]) x)
// C ^ (D ^ x) -> (C ^ D) ^ x
(Xor64 (Const64 <t> [c]) (Xor64 (Const64 <t> [d]) x)) -> (Xor64 (Const64 <t> [c^d]) x)
(Xor32 (Const32 <t> [c]) (Xor32 (Const32 <t> [d]) x)) -> (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
(Xor16 (Const16 <t> [c]) (Xor16 (Const16 <t> [d]) x)) -> (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
(Xor8 (Const8 <t> [c]) (Xor8 (Const8 <t> [d]) x)) -> (Xor8 (Const8 <t> [int64(int8(c^d))]) x)
// C * (D * x) = (C * D) * x
(Mul64 (Const64 <t> [c]) (Mul64 (Const64 <t> [d]) x)) -> (Mul64 (Const64 <t> [c*d]) x)
(Mul32 (Const32 <t> [c]) (Mul32 (Const32 <t> [d]) x)) -> (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
(Mul16 (Const16 <t> [c]) (Mul16 (Const16 <t> [d]) x)) -> (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
(Mul8 (Const8 <t> [c]) (Mul8 (Const8 <t> [d]) x)) -> (Mul8 (Const8 <t> [int64(int8(c*d))]) x)
// floating point optimizations
(Mul(32|64)F x (Const(32|64)F [auxFrom64F(1)])) -> x
(Mul32F x (Const32F [auxFrom32F(-1)])) -> (Neg32F x)
(Mul64F x (Const64F [auxFrom64F(-1)])) -> (Neg64F x)
(Mul32F x (Const32F [auxFrom32F(2)])) -> (Add32F x x)
(Mul64F x (Const64F [auxFrom64F(2)])) -> (Add64F x x)
(Div32F x (Const32F <t> [c])) && reciprocalExact32(auxTo32F(c)) -> (Mul32F x (Const32F <t> [auxFrom32F(1/auxTo32F(c))]))
(Div64F x (Const64F <t> [c])) && reciprocalExact64(auxTo64F(c)) -> (Mul64F x (Const64F <t> [auxFrom64F(1/auxTo64F(c))]))
(Sqrt (Const64F [c])) -> (Const64F [auxFrom64F(math.Sqrt(auxTo64F(c)))])
// recognize runtime.newobject and don't Zero/Nilcheck it
(Zero (Load (OffPtr [c] (SP)) mem) mem)
&& mem.Op == OpStaticCall
&& isSameSym(mem.Aux, "runtime.newobject")
&& c == config.ctxt.FixedFrameSize() + config.RegSize // offset of return value
-> mem
(Store (Load (OffPtr [c] (SP)) mem) x mem)
&& isConstZero(x)
&& mem.Op == OpStaticCall
&& isSameSym(mem.Aux, "runtime.newobject")
&& c == config.ctxt.FixedFrameSize() + config.RegSize // offset of return value
-> mem
(Store (OffPtr (Load (OffPtr [c] (SP)) mem)) x mem)
&& isConstZero(x)
&& mem.Op == OpStaticCall
&& isSameSym(mem.Aux, "runtime.newobject")
&& c == config.ctxt.FixedFrameSize() + config.RegSize // offset of return value
-> mem
// nil checks just need to rewrite to something useless.
// they will be deadcode eliminated soon afterwards.
(NilCheck (Load (OffPtr [c] (SP)) (StaticCall {sym} _)) _)
&& isSameSym(sym, "runtime.newobject")
&& c == config.ctxt.FixedFrameSize() + config.RegSize // offset of return value
&& warnRule(fe.Debug_checknil(), v, "removed nil check")
-> (Invalid)
(NilCheck (OffPtr (Load (OffPtr [c] (SP)) (StaticCall {sym} _))) _)
&& isSameSym(sym, "runtime.newobject")
&& c == config.ctxt.FixedFrameSize() + config.RegSize // offset of return value
&& warnRule(fe.Debug_checknil(), v, "removed nil check")
-> (Invalid)
// Evaluate constant address comparisons.
(EqPtr x x) -> (ConstBool [1])
(NeqPtr x x) -> (ConstBool [0])
(EqPtr (Addr {a} _) (Addr {b} _)) -> (ConstBool [b2i(a == b)])
(NeqPtr (Addr {a} _) (Addr {b} _)) -> (ConstBool [b2i(a != b)])
(EqPtr (LocalAddr {a} _ _) (LocalAddr {b} _ _)) -> (ConstBool [b2i(a == b)])
(NeqPtr (LocalAddr {a} _ _) (LocalAddr {b} _ _)) -> (ConstBool [b2i(a != b)])
(EqPtr (OffPtr [o1] p1) p2) && isSamePtr(p1, p2) -> (ConstBool [b2i(o1 == 0)])
(NeqPtr (OffPtr [o1] p1) p2) && isSamePtr(p1, p2) -> (ConstBool [b2i(o1 != 0)])
(EqPtr (OffPtr [o1] p1) (OffPtr [o2] p2)) && isSamePtr(p1, p2) -> (ConstBool [b2i(o1 == o2)])
(NeqPtr (OffPtr [o1] p1) (OffPtr [o2] p2)) && isSamePtr(p1, p2) -> (ConstBool [b2i(o1 != o2)])
(EqPtr (Const(32|64) [c]) (Const(32|64) [d])) -> (ConstBool [b2i(c == d)])
(NeqPtr (Const(32|64) [c]) (Const(32|64) [d])) -> (ConstBool [b2i(c != d)])
(EqPtr (LocalAddr _ _) (Addr _)) -> (ConstBool [0])
(NeqPtr (LocalAddr _ _) (Addr _)) -> (ConstBool [1])
(EqPtr (Addr _) (LocalAddr _ _)) -> (ConstBool [0])
(NeqPtr (Addr _) (LocalAddr _ _)) -> (ConstBool [1])
// Simplify address comparisons.
(EqPtr (AddPtr p1 o1) p2) && isSamePtr(p1, p2) -> (Not (IsNonNil o1))
(NeqPtr (AddPtr p1 o1) p2) && isSamePtr(p1, p2) -> (IsNonNil o1)
(EqPtr (Const(32|64) [0]) p) -> (Not (IsNonNil p))
(NeqPtr (Const(32|64) [0]) p) -> (IsNonNil p)
(EqPtr (ConstNil) p) -> (Not (IsNonNil p))
(NeqPtr (ConstNil) p) -> (IsNonNil p)
// Evaluate constant user nil checks.
(IsNonNil (ConstNil)) -> (ConstBool [0])
(IsNonNil (Const(32|64) [c])) -> (ConstBool [b2i(c != 0)])
(IsNonNil (Addr _)) -> (ConstBool [1])
(IsNonNil (LocalAddr _ _)) -> (ConstBool [1])
// Inline small or disjoint runtime.memmove calls with constant length.
(StaticCall {sym} s1:(Store _ (Const(64|32) [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))
&& isSameSym(sym,"runtime.memmove")
&& t.(*types.Type).IsPtr() // avoids TUINTPTR, see issue 30061
&& s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
&& isInlinableMemmove(dst,src,sz,config)
&& clobber(s1) && clobber(s2) && clobber(s3)
-> (Move {t.(*types.Type).Elem()} [sz] dst src mem)
// De-virtualize interface calls into static calls.
// Note that (ITab (IMake)) doesn't get
// rewritten until after the first opt pass,
// so this rule should trigger reliably.
(InterCall [argsize] (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) mem) && devirt(v, itab, off) != nil ->
(StaticCall [argsize] {devirt(v, itab, off)} mem)
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
// Move and Zero optimizations.
// Move source and destination may overlap.
// Convert Moves into Zeros when the source is known to be zeros.
(Move {t} [n] dst1 src mem:(Zero {t} [n] dst2 _)) && isSamePtr(src, dst2)
-> (Zero {t} [n] dst1 mem)
(Move {t} [n] dst1 src mem:(VarDef (Zero {t} [n] dst0 _))) && isSamePtr(src, dst0)
-> (Zero {t} [n] dst1 mem)
// Don't Store to variables that are about to be overwritten by Move/Zero.
(Zero {t1} [n] p1 store:(Store {t2} (OffPtr [o2] p2) _ mem))
&& isSamePtr(p1, p2) && store.Uses == 1
&& n >= o2 + sizeof(t2)
&& clobber(store)
-> (Zero {t1} [n] p1 mem)
(Move {t1} [n] dst1 src1 store:(Store {t2} op:(OffPtr [o2] dst2) _ mem))
&& isSamePtr(dst1, dst2) && store.Uses == 1
&& n >= o2 + sizeof(t2)
&& disjoint(src1, n, op, sizeof(t2))
&& clobber(store)
-> (Move {t1} [n] dst1 src1 mem)
// Don't Move to variables that are immediately completely overwritten.
(Zero {t} [n] dst1 move:(Move {t} [n] dst2 _ mem))
&& move.Uses == 1
&& isSamePtr(dst1, dst2)
&& clobber(move)
-> (Zero {t} [n] dst1 mem)
(Move {t} [n] dst1 src1 move:(Move {t} [n] dst2 _ mem))
&& move.Uses == 1
&& isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n)
&& clobber(move)
-> (Move {t} [n] dst1 src1 mem)
(Zero {t} [n] dst1 vardef:(VarDef {x} move:(Move {t} [n] dst2 _ mem)))
&& move.Uses == 1 && vardef.Uses == 1
&& isSamePtr(dst1, dst2)
&& clobber(move) && clobber(vardef)
-> (Zero {t} [n] dst1 (VarDef {x} mem))
(Move {t} [n] dst1 src1 vardef:(VarDef {x} move:(Move {t} [n] dst2 _ mem)))
&& move.Uses == 1 && vardef.Uses == 1
&& isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n)
&& clobber(move) && clobber(vardef)
-> (Move {t} [n] dst1 src1 (VarDef {x} mem))
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [0] p2) d2
m3:(Move [n] p3 _ mem)))
&& m2.Uses == 1 && m3.Uses == 1
&& o1 == sizeof(t2)
&& n == sizeof(t2) + sizeof(t1)
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
&& clobber(m2) && clobber(m3)
-> (Store {t1} op1 d1 (Store {t2} op2 d2 mem))
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [o2] p2) d2
m3:(Store {t3} op3:(OffPtr [0] p3) d3
m4:(Move [n] p4 _ mem))))
&& m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1
&& o2 == sizeof(t3)
&& o1-o2 == sizeof(t2)
&& n == sizeof(t3) + sizeof(t2) + sizeof(t1)
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
&& clobber(m2) && clobber(m3) && clobber(m4)
-> (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 mem)))
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [o2] p2) d2
m3:(Store {t3} op3:(OffPtr [o3] p3) d3
m4:(Store {t4} op4:(OffPtr [0] p4) d4
m5:(Move [n] p5 _ mem)))))
&& m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1 && m5.Uses == 1
&& o3 == sizeof(t4)
&& o2-o3 == sizeof(t3)
&& o1-o2 == sizeof(t2)
&& n == sizeof(t4) + sizeof(t3) + sizeof(t2) + sizeof(t1)
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
&& clobber(m2) && clobber(m3) && clobber(m4) && clobber(m5)
-> (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 (Store {t4} op4 d4 mem))))
// Don't Zero variables that are immediately completely overwritten
// before being accessed.
(Move {t} [n] dst1 src1 zero:(Zero {t} [n] dst2 mem))
&& zero.Uses == 1
&& isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n)
&& clobber(zero)
-> (Move {t} [n] dst1 src1 mem)
(Move {t} [n] dst1 src1 vardef:(VarDef {x} zero:(Zero {t} [n] dst2 mem)))
&& zero.Uses == 1 && vardef.Uses == 1
&& isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n)
&& clobber(zero) && clobber(vardef)
-> (Move {t} [n] dst1 src1 (VarDef {x} mem))
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [0] p2) d2
m3:(Zero [n] p3 mem)))
&& m2.Uses == 1 && m3.Uses == 1
&& o1 == sizeof(t2)
&& n == sizeof(t2) + sizeof(t1)
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
&& clobber(m2) && clobber(m3)
-> (Store {t1} op1 d1 (Store {t2} op2 d2 mem))
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [o2] p2) d2
m3:(Store {t3} op3:(OffPtr [0] p3) d3
m4:(Zero [n] p4 mem))))
&& m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1
&& o2 == sizeof(t3)
&& o1-o2 == sizeof(t2)
&& n == sizeof(t3) + sizeof(t2) + sizeof(t1)
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
&& clobber(m2) && clobber(m3) && clobber(m4)
-> (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 mem)))
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [o2] p2) d2
m3:(Store {t3} op3:(OffPtr [o3] p3) d3
m4:(Store {t4} op4:(OffPtr [0] p4) d4
m5:(Zero [n] p5 mem)))))
&& m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1 && m5.Uses == 1
&& o3 == sizeof(t4)
&& o2-o3 == sizeof(t3)
&& o1-o2 == sizeof(t2)
&& n == sizeof(t4) + sizeof(t3) + sizeof(t2) + sizeof(t1)
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
&& clobber(m2) && clobber(m3) && clobber(m4) && clobber(m5)
-> (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 (Store {t4} op4 d4 mem))))
// Don't Move from memory if the values are likely to already be
// in registers.
(Move {t1} [n] dst p1
mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [0] p3) d2 _)))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& o2 == sizeof(t3)
&& n == sizeof(t2) + sizeof(t3)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [0] dst) d2 mem))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst p1
mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [o3] p3) d2
(Store {t4} op4:(OffPtr <tt4> [0] p4) d3 _))))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& o3 == sizeof(t4)
&& o2-o3 == sizeof(t3)
&& n == sizeof(t2) + sizeof(t3) + sizeof(t4)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [0] dst) d3 mem)))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst p1
mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [o3] p3) d2
(Store {t4} op4:(OffPtr <tt4> [o4] p4) d3
(Store {t5} op5:(OffPtr <tt5> [0] p5) d4 _)))))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& alignof(t5) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& registerizable(b, t5)
&& o4 == sizeof(t5)
&& o3-o4 == sizeof(t4)
&& o2-o3 == sizeof(t3)
&& n == sizeof(t2) + sizeof(t3) + sizeof(t4) + sizeof(t5)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Store {t5} (OffPtr <tt5> [0] dst) d4 mem))))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
// Same thing but with VarDef in the middle.
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [0] p3) d2 _))))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& o2 == sizeof(t3)
&& n == sizeof(t2) + sizeof(t3)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [0] dst) d2 mem))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [o3] p3) d2
(Store {t4} op4:(OffPtr <tt4> [0] p4) d3 _)))))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& o3 == sizeof(t4)
&& o2-o3 == sizeof(t3)
&& n == sizeof(t2) + sizeof(t3) + sizeof(t4)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [0] dst) d3 mem)))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [o3] p3) d2
(Store {t4} op4:(OffPtr <tt4> [o4] p4) d3
(Store {t5} op5:(OffPtr <tt5> [0] p5) d4 _))))))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& alignof(t5) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& registerizable(b, t5)
&& o4 == sizeof(t5)
&& o3-o4 == sizeof(t4)
&& o2-o3 == sizeof(t3)
&& n == sizeof(t2) + sizeof(t3) + sizeof(t4) + sizeof(t5)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Store {t5} (OffPtr <tt5> [0] dst) d4 mem))))
cmd/compile: add some generic composite type optimizations Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-11 22:47:24 +01:00
// Prefer to Zero and Store than to Move.
(Move {t1} [n] dst p1
mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Zero {t3} [n] p3 _)))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& registerizable(b, t2)
&& n >= o2 + sizeof(t2)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Zero {t1} [n] dst mem))
(Move {t1} [n] dst p1
mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Zero {t4} [n] p4 _))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& n >= o2 + sizeof(t2)
&& n >= o3 + sizeof(t3)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Zero {t1} [n] dst mem)))
(Move {t1} [n] dst p1
mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Store {t4} (OffPtr <tt4> [o4] p4) d3
(Zero {t5} [n] p5 _)))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& alignof(t5) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& n >= o2 + sizeof(t2)
&& n >= o3 + sizeof(t3)
&& n >= o4 + sizeof(t4)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Zero {t1} [n] dst mem))))
(Move {t1} [n] dst p1
mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Store {t4} (OffPtr <tt4> [o4] p4) d3
(Store {t5} (OffPtr <tt5> [o5] p5) d4
(Zero {t6} [n] p6 _))))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& alignof(t5) <= alignof(t1)
&& alignof(t6) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& registerizable(b, t5)
&& n >= o2 + sizeof(t2)
&& n >= o3 + sizeof(t3)
&& n >= o4 + sizeof(t4)
&& n >= o5 + sizeof(t5)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Store {t5} (OffPtr <tt5> [o5] dst) d4
(Zero {t1} [n] dst mem)))))
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Zero {t3} [n] p3 _))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& registerizable(b, t2)
&& n >= o2 + sizeof(t2)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Zero {t1} [n] dst mem))
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Zero {t4} [n] p4 _)))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& n >= o2 + sizeof(t2)
&& n >= o3 + sizeof(t3)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Zero {t1} [n] dst mem)))
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Store {t4} (OffPtr <tt4> [o4] p4) d3
(Zero {t5} [n] p5 _))))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& alignof(t5) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& n >= o2 + sizeof(t2)
&& n >= o3 + sizeof(t3)
&& n >= o4 + sizeof(t4)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Zero {t1} [n] dst mem))))
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Store {t4} (OffPtr <tt4> [o4] p4) d3
(Store {t5} (OffPtr <tt5> [o5] p5) d4
(Zero {t6} [n] p6 _)))))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6)
&& alignof(t2) <= alignof(t1)
&& alignof(t3) <= alignof(t1)
&& alignof(t4) <= alignof(t1)
&& alignof(t5) <= alignof(t1)
&& alignof(t6) <= alignof(t1)
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& registerizable(b, t5)
&& n >= o2 + sizeof(t2)
&& n >= o3 + sizeof(t3)
&& n >= o4 + sizeof(t4)
&& n >= o5 + sizeof(t5)
-> (Store {t2} (OffPtr <tt2> [o2] dst) d1
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Store {t5} (OffPtr <tt5> [o5] dst) d4
(Zero {t1} [n] dst mem)))))
(StaticCall {sym} x) && needRaceCleanup(sym,v) -> x
cmd/compile: move argument stack construction to SSA generation The goal of this change is to move work from walk to SSA, and simplify things along the way. This is hard to accomplish cleanly with small incremental changes, so this large commit message aims to provide a roadmap to the diff. High level description: Prior to this change, walk was responsible for constructing (most of) the stack for function calls. ascompatte gathered variadic arguments into a slice. It also rewrote n.List from a list of arguments to a list of assignments to stack slots. ascompatte was called multiple times to handle the receiver in a method call. reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack. adjustargs then made extra stack space for go/defer args as needed. Node to SSA construction evaluated all the statements in n.List, and issued the function call, assuming that the stack was correctly constructed. Intrinsic calls had to dig around inside n.List to extract the arguments, since intrinsics don't use the stack to make function calls. This change moves stack construction to the SSA construction phase. ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did. It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries. It does not, however, make any assignments to stack slots. Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List. (It would be better to use Ninit instead of List; future work.) During SSA construction, after doing all the temporary assignments in n.List, the function arguments are assigned to stack slots by constructing the appropriate SSA Value, using (*state).storeArg. SSA construction also now handles adjustments for go/defer args. This change also simplifies intrinsic calls, since we no longer need to undo walk's work. Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely. Generated code differences: There were a few optimizations applied along the way, the old way. f(g()) was rewritten to do a block copy of function results to function arguments. And reorder1 avoided introducing the final "save the stack" temporary in n.List. The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed. SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary. The exception was when the temporary's type was not SSA-able; in that case, we got a Move into an autotmp and then an immediate Move onto the stack, with the autotmp never read or used again. This change introduces a new rewrite rule to detect such pointless double Moves and collapse them into a single Move. This is actually more powerful than the original optimization, since the original optimization relied on the imprecise Node.HasCall calculation. The other significant difference in the generated code is that the stack is now constructed completely in SP-offset order. Prior to this change, the stack was constructed somewhat haphazardly: first the final argument that Node.HasCall deemed to require a temporary, then other arguments, then the method receiver, then the defer/go args. SP-offset is probably a good default order. See future work. There are a few minor object file size changes as a result of this change. I investigated some regressions in early versions of this change. One regression (in archive/tar) was the addition of a single CMPQ instruction, which would be eliminated were this TODO from flagalloc to be done: // TODO: Remove original instructions if they are never used. One regression (in text/template) was an ADDQconstmodify that is now a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change in the order in which arguments are written. The argument change order can also now be luckier, so this appears to be a wash. All in all, though there will be minor winners and losers, this change appears to be performance neutral. Future work: Move loading the result of function calls to SSA construction; eliminate OINDREGSP. Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass. Among other benefits, this would make it easier to transition to a new calling convention. This would require rethinking the handling of stack conflicts and is non-trivial. Figure out some clean way to indicate that stack construction Stores/Moves do not alias each other, so that subsequent passes may do things like CSE+tighten shared stack setup, do DSE using non-first Stores, etc. This would allow us to eliminate the minor text/template regression. Possibly make assignments to stack slots not treated as statements by DWARF. Compiler benchmarks: name old time/op new time/op delta Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48) Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50) GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48) Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50) SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50) Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45) GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49) Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43) Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50) XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49) [Geo mean] 382ms 378ms -0.89% name old user-time/op new user-time/op delta Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48) Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49) GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48) Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46) SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50) Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48) GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46) Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48) Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50) XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48) [Geo mean] 482ms 476ms -1.23% name old alloc/op new alloc/op delta Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5) GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5) Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5) SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5) Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5) GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5) Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5) Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5) XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5) [Geo mean] 82.8MB 81.1MB -2.08% name old allocs/op new allocs/op delta Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5) Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5) GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5) Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5) SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5) Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5) GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4) Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5) Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5) XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5) [Geo mean] 815k 804k -1.35% name old object-bytes new object-bytes delta Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5) Unicode 224kB ± 0% 224kB ± 0% ~ (all equal) GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5) Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5) GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5) Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5) Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5) XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5) Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143 Reviewed-on: https://go-review.googlesource.com/c/114797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
// Collapse moving A -> B -> C into just A -> C.
// Later passes (deadstore, elim unread auto) will remove the A -> B move, if possible.
// This happens most commonly when B is an autotmp inserted earlier
// during compilation to ensure correctness.
// Take care that overlapping moves are preserved.
// Restrict this optimization to the stack, to avoid duplicating loads from the heap;
// see CL 145208 for discussion.
(Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _))
cmd/compile: move argument stack construction to SSA generation The goal of this change is to move work from walk to SSA, and simplify things along the way. This is hard to accomplish cleanly with small incremental changes, so this large commit message aims to provide a roadmap to the diff. High level description: Prior to this change, walk was responsible for constructing (most of) the stack for function calls. ascompatte gathered variadic arguments into a slice. It also rewrote n.List from a list of arguments to a list of assignments to stack slots. ascompatte was called multiple times to handle the receiver in a method call. reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack. adjustargs then made extra stack space for go/defer args as needed. Node to SSA construction evaluated all the statements in n.List, and issued the function call, assuming that the stack was correctly constructed. Intrinsic calls had to dig around inside n.List to extract the arguments, since intrinsics don't use the stack to make function calls. This change moves stack construction to the SSA construction phase. ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did. It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries. It does not, however, make any assignments to stack slots. Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List. (It would be better to use Ninit instead of List; future work.) During SSA construction, after doing all the temporary assignments in n.List, the function arguments are assigned to stack slots by constructing the appropriate SSA Value, using (*state).storeArg. SSA construction also now handles adjustments for go/defer args. This change also simplifies intrinsic calls, since we no longer need to undo walk's work. Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely. Generated code differences: There were a few optimizations applied along the way, the old way. f(g()) was rewritten to do a block copy of function results to function arguments. And reorder1 avoided introducing the final "save the stack" temporary in n.List. The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed. SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary. The exception was when the temporary's type was not SSA-able; in that case, we got a Move into an autotmp and then an immediate Move onto the stack, with the autotmp never read or used again. This change introduces a new rewrite rule to detect such pointless double Moves and collapse them into a single Move. This is actually more powerful than the original optimization, since the original optimization relied on the imprecise Node.HasCall calculation. The other significant difference in the generated code is that the stack is now constructed completely in SP-offset order. Prior to this change, the stack was constructed somewhat haphazardly: first the final argument that Node.HasCall deemed to require a temporary, then other arguments, then the method receiver, then the defer/go args. SP-offset is probably a good default order. See future work. There are a few minor object file size changes as a result of this change. I investigated some regressions in early versions of this change. One regression (in archive/tar) was the addition of a single CMPQ instruction, which would be eliminated were this TODO from flagalloc to be done: // TODO: Remove original instructions if they are never used. One regression (in text/template) was an ADDQconstmodify that is now a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change in the order in which arguments are written. The argument change order can also now be luckier, so this appears to be a wash. All in all, though there will be minor winners and losers, this change appears to be performance neutral. Future work: Move loading the result of function calls to SSA construction; eliminate OINDREGSP. Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass. Among other benefits, this would make it easier to transition to a new calling convention. This would require rethinking the handling of stack conflicts and is non-trivial. Figure out some clean way to indicate that stack construction Stores/Moves do not alias each other, so that subsequent passes may do things like CSE+tighten shared stack setup, do DSE using non-first Stores, etc. This would allow us to eliminate the minor text/template regression. Possibly make assignments to stack slots not treated as statements by DWARF. Compiler benchmarks: name old time/op new time/op delta Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48) Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50) GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48) Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50) SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50) Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45) GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49) Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43) Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50) XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49) [Geo mean] 382ms 378ms -0.89% name old user-time/op new user-time/op delta Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48) Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49) GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48) Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46) SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50) Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48) GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46) Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48) Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50) XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48) [Geo mean] 482ms 476ms -1.23% name old alloc/op new alloc/op delta Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5) GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5) Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5) SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5) Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5) GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5) Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5) Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5) XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5) [Geo mean] 82.8MB 81.1MB -2.08% name old allocs/op new allocs/op delta Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5) Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5) GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5) Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5) SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5) Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5) GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4) Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5) Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5) XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5) [Geo mean] 815k 804k -1.35% name old object-bytes new object-bytes delta Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5) Unicode 224kB ± 0% 224kB ± 0% ~ (all equal) GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5) Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5) GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5) Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5) Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5) XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5) Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143 Reviewed-on: https://go-review.googlesource.com/c/114797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
&& t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq
&& isSamePtr(tmp1, tmp2)
&& isStackPtr(src)
&& disjoint(src, s, tmp2, s)
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
-> (Move {t1} [s] dst src midmem)
cmd/compile: move argument stack construction to SSA generation The goal of this change is to move work from walk to SSA, and simplify things along the way. This is hard to accomplish cleanly with small incremental changes, so this large commit message aims to provide a roadmap to the diff. High level description: Prior to this change, walk was responsible for constructing (most of) the stack for function calls. ascompatte gathered variadic arguments into a slice. It also rewrote n.List from a list of arguments to a list of assignments to stack slots. ascompatte was called multiple times to handle the receiver in a method call. reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack. adjustargs then made extra stack space for go/defer args as needed. Node to SSA construction evaluated all the statements in n.List, and issued the function call, assuming that the stack was correctly constructed. Intrinsic calls had to dig around inside n.List to extract the arguments, since intrinsics don't use the stack to make function calls. This change moves stack construction to the SSA construction phase. ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did. It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries. It does not, however, make any assignments to stack slots. Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List. (It would be better to use Ninit instead of List; future work.) During SSA construction, after doing all the temporary assignments in n.List, the function arguments are assigned to stack slots by constructing the appropriate SSA Value, using (*state).storeArg. SSA construction also now handles adjustments for go/defer args. This change also simplifies intrinsic calls, since we no longer need to undo walk's work. Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely. Generated code differences: There were a few optimizations applied along the way, the old way. f(g()) was rewritten to do a block copy of function results to function arguments. And reorder1 avoided introducing the final "save the stack" temporary in n.List. The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed. SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary. The exception was when the temporary's type was not SSA-able; in that case, we got a Move into an autotmp and then an immediate Move onto the stack, with the autotmp never read or used again. This change introduces a new rewrite rule to detect such pointless double Moves and collapse them into a single Move. This is actually more powerful than the original optimization, since the original optimization relied on the imprecise Node.HasCall calculation. The other significant difference in the generated code is that the stack is now constructed completely in SP-offset order. Prior to this change, the stack was constructed somewhat haphazardly: first the final argument that Node.HasCall deemed to require a temporary, then other arguments, then the method receiver, then the defer/go args. SP-offset is probably a good default order. See future work. There are a few minor object file size changes as a result of this change. I investigated some regressions in early versions of this change. One regression (in archive/tar) was the addition of a single CMPQ instruction, which would be eliminated were this TODO from flagalloc to be done: // TODO: Remove original instructions if they are never used. One regression (in text/template) was an ADDQconstmodify that is now a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change in the order in which arguments are written. The argument change order can also now be luckier, so this appears to be a wash. All in all, though there will be minor winners and losers, this change appears to be performance neutral. Future work: Move loading the result of function calls to SSA construction; eliminate OINDREGSP. Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass. Among other benefits, this would make it easier to transition to a new calling convention. This would require rethinking the handling of stack conflicts and is non-trivial. Figure out some clean way to indicate that stack construction Stores/Moves do not alias each other, so that subsequent passes may do things like CSE+tighten shared stack setup, do DSE using non-first Stores, etc. This would allow us to eliminate the minor text/template regression. Possibly make assignments to stack slots not treated as statements by DWARF. Compiler benchmarks: name old time/op new time/op delta Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48) Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50) GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48) Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50) SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50) Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45) GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49) Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43) Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50) XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49) [Geo mean] 382ms 378ms -0.89% name old user-time/op new user-time/op delta Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48) Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49) GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48) Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46) SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50) Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48) GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46) Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48) Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50) XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48) [Geo mean] 482ms 476ms -1.23% name old alloc/op new alloc/op delta Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5) GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5) Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5) SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5) Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5) GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5) Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5) Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5) XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5) [Geo mean] 82.8MB 81.1MB -2.08% name old allocs/op new allocs/op delta Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5) Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5) GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5) Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5) SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5) Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5) GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4) Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5) Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5) XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5) [Geo mean] 815k 804k -1.35% name old object-bytes new object-bytes delta Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5) Unicode 224kB ± 0% 224kB ± 0% ~ (all equal) GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5) Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5) GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5) Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5) Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5) XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5) Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143 Reviewed-on: https://go-review.googlesource.com/c/114797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
// Same, but for large types that require VarDefs.
(Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _)))
&& t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq
&& isSamePtr(tmp1, tmp2)
&& isStackPtr(src)
&& disjoint(src, s, tmp2, s)
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
-> (Move {t1} [s] dst src midmem)
cmd/compile: move argument stack construction to SSA generation The goal of this change is to move work from walk to SSA, and simplify things along the way. This is hard to accomplish cleanly with small incremental changes, so this large commit message aims to provide a roadmap to the diff. High level description: Prior to this change, walk was responsible for constructing (most of) the stack for function calls. ascompatte gathered variadic arguments into a slice. It also rewrote n.List from a list of arguments to a list of assignments to stack slots. ascompatte was called multiple times to handle the receiver in a method call. reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack. adjustargs then made extra stack space for go/defer args as needed. Node to SSA construction evaluated all the statements in n.List, and issued the function call, assuming that the stack was correctly constructed. Intrinsic calls had to dig around inside n.List to extract the arguments, since intrinsics don't use the stack to make function calls. This change moves stack construction to the SSA construction phase. ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did. It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries. It does not, however, make any assignments to stack slots. Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List. (It would be better to use Ninit instead of List; future work.) During SSA construction, after doing all the temporary assignments in n.List, the function arguments are assigned to stack slots by constructing the appropriate SSA Value, using (*state).storeArg. SSA construction also now handles adjustments for go/defer args. This change also simplifies intrinsic calls, since we no longer need to undo walk's work. Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely. Generated code differences: There were a few optimizations applied along the way, the old way. f(g()) was rewritten to do a block copy of function results to function arguments. And reorder1 avoided introducing the final "save the stack" temporary in n.List. The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed. SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary. The exception was when the temporary's type was not SSA-able; in that case, we got a Move into an autotmp and then an immediate Move onto the stack, with the autotmp never read or used again. This change introduces a new rewrite rule to detect such pointless double Moves and collapse them into a single Move. This is actually more powerful than the original optimization, since the original optimization relied on the imprecise Node.HasCall calculation. The other significant difference in the generated code is that the stack is now constructed completely in SP-offset order. Prior to this change, the stack was constructed somewhat haphazardly: first the final argument that Node.HasCall deemed to require a temporary, then other arguments, then the method receiver, then the defer/go args. SP-offset is probably a good default order. See future work. There are a few minor object file size changes as a result of this change. I investigated some regressions in early versions of this change. One regression (in archive/tar) was the addition of a single CMPQ instruction, which would be eliminated were this TODO from flagalloc to be done: // TODO: Remove original instructions if they are never used. One regression (in text/template) was an ADDQconstmodify that is now a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change in the order in which arguments are written. The argument change order can also now be luckier, so this appears to be a wash. All in all, though there will be minor winners and losers, this change appears to be performance neutral. Future work: Move loading the result of function calls to SSA construction; eliminate OINDREGSP. Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass. Among other benefits, this would make it easier to transition to a new calling convention. This would require rethinking the handling of stack conflicts and is non-trivial. Figure out some clean way to indicate that stack construction Stores/Moves do not alias each other, so that subsequent passes may do things like CSE+tighten shared stack setup, do DSE using non-first Stores, etc. This would allow us to eliminate the minor text/template regression. Possibly make assignments to stack slots not treated as statements by DWARF. Compiler benchmarks: name old time/op new time/op delta Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48) Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50) GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48) Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50) SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50) Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45) GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49) Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43) Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50) XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49) [Geo mean] 382ms 378ms -0.89% name old user-time/op new user-time/op delta Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48) Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49) GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48) Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46) SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50) Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48) GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46) Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48) Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50) XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48) [Geo mean] 482ms 476ms -1.23% name old alloc/op new alloc/op delta Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5) GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5) Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5) SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5) Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5) GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5) Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5) Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5) XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5) [Geo mean] 82.8MB 81.1MB -2.08% name old allocs/op new allocs/op delta Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5) Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5) GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5) Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5) SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5) Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5) GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4) Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5) Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5) XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5) [Geo mean] 815k 804k -1.35% name old object-bytes new object-bytes delta Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5) Unicode 224kB ± 0% 224kB ± 0% ~ (all equal) GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5) Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5) GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5) Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5) Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5) XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5) Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143 Reviewed-on: https://go-review.googlesource.com/c/114797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
// Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go).
// However, this rule is needed to prevent the previous rule from looping forever in such cases.
(Move dst src mem) && isSamePtr(dst, src) -> mem